TextEncoder polyfill

Apparently Safari doesn't sport a TextEncoder, so here's a polyfill for it.
This commit is contained in:
Richard van der Hoff 2017-01-12 11:45:47 +00:00
parent 07b3c58c61
commit 1d5d44d63d
4 changed files with 333 additions and 0 deletions

View file

@ -0,0 +1,131 @@
/*
Copyright 2017 Vector Creations Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
"use strict";
// Polyfill for TextDecoder.
const REPLACEMENT_CHAR = '\uFFFD';
export default class TextDecoder {
/**
* Decode a UTF-8 byte array as a javascript string
*
* @param {Uint8Array} u8Array UTF-8-encoded onput
* @return {str}
*/
decode(u8Array) {
let u0, u1, u2, u3;
let str = '';
let idx = 0;
while (idx < u8Array.length) {
u0 = u8Array[idx++];
if (!(u0 & 0x80)) {
str += String.fromCharCode(u0);
continue;
}
if ((u0 & 0xC0) != 0xC0) {
// continuation byte where we expect a leading byte
str += REPLACEMENT_CHAR;
continue;
}
if (u0 > 0xF4) {
// this would imply a 5-byte or longer encoding, which is
// invalid and unsupported here.
str += REPLACEMENT_CHAR;
continue;
}
u1 = u8Array[idx++];
if (u1 === undefined) {
str += REPLACEMENT_CHAR;
continue;
}
if ((u1 & 0xC0) != 0x80) {
// leading byte where we expect a continuation byte
str += REPLACEMENT_CHAR.repeat(2);
continue;
}
u1 &= 0x3F;
if (!(u0 & 0x20)) {
const u = ((u0 & 0x1F) << 6) | u1;
if (u < 0x80) {
// over-long
str += REPLACEMENT_CHAR.repeat(2);
} else {
str += String.fromCharCode(u);
}
continue;
}
u2 = u8Array[idx++];
if (u2 === undefined) {
str += REPLACEMENT_CHAR.repeat(2);
continue;
}
if ((u2 & 0xC0) != 0x80) {
// leading byte where we expect a continuation byte
str += REPLACEMENT_CHAR.repeat(3);
continue;
}
u2 &= 0x3F;
if (!(u0 & 0x10)) {
const u = ((u0 & 0x0F) << 12) | (u1 << 6) | u2;
if (u < 0x800) {
// over-long
str += REPLACEMENT_CHAR.repeat(3);
} else if (u == 0xFEFF && idx == 3) {
// byte-order mark: do not add to output
} else {
str += String.fromCharCode(u);
}
continue;
}
u3 = u8Array[idx++];
if (u3 === undefined) {
str += REPLACEMENT_CHAR.repeat(3);
continue;
}
if ((u3 & 0xC0) != 0x80) {
// leading byte where we expect a continuation byte
str += REPLACEMENT_CHAR.repeat(4);
continue;
}
u3 &= 0x3F;
const u = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
if (u < 0x10000) {
// over-long
str += REPLACEMENT_CHAR.repeat(4);
continue;
}
if (u > 0x1FFFF) {
// unicode stops here.
str += REPLACEMENT_CHAR.repeat(4);
continue;
}
// encode as utf-16
const v = u - 0x10000;
str += String.fromCharCode(0xD800 | (v >> 10), 0xDC00 | (v & 0x3FF));
}
return str;
}
}

View file

@ -0,0 +1,78 @@
/*
Copyright 2017 Vector Creations Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
"use strict";
// Polyfill for TextEncoder. Based on emscripten's stringToUTF8Array.
function utf8len(str) {
var len = 0;
for (var i = 0; i < str.length; ++i) {
var u = str.charCodeAt(i);
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
// lead surrogate - combine with next surrogate
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
}
if (u <= 0x7F) {
++len;
} else if (u <= 0x7FF) {
len += 2;
} else if (u <= 0xFFFF) {
len += 3;
} else {
len += 4;
}
}
return len;
}
export default class TextEncoder {
/**
* Encode a javascript string as utf-8
*
* @param {String} str String to encode
* @return {Uint8Array} UTF-8-encoded output
*/
encode(str) {
const outU8Array = new Uint8Array(utf8len(str));
var outIdx = 0;
for (var i = 0; i < str.length; ++i) {
var u = str.charCodeAt(i);
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
// lead surrogate - combine with next surrogate
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
}
if (u <= 0x7F) {
outU8Array[outIdx++] = u;
} else if (u <= 0x7FF) {
outU8Array[outIdx++] = 0xC0 | (u >> 6);
outU8Array[outIdx++] = 0x80 | (u & 63);
} else if (u <= 0xFFFF) {
outU8Array[outIdx++] = 0xE0 | (u >> 12);
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
outU8Array[outIdx++] = 0x80 | (u & 63);
} else {
outU8Array[outIdx++] = 0xF0 | (u >> 18);
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
outU8Array[outIdx++] = 0x80 | (u & 63);
}
}
return outU8Array;
}
}

View file

@ -0,0 +1,85 @@
/*
Copyright 2017 Vector Creations Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
"use strict";
import TextDecoderPolyfill from 'utils/TextDecoderPolyfill';
import * as testUtils from '../test-utils';
import expect from 'expect';
describe('textDecoderPolyfill', function() {
beforeEach(function() {
testUtils.beforeEach(this);
});
it('should correctly decode a range of strings', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(65, 66, 67))).toEqual('ABC');
expect(decoder.decode(Uint8Array.of(0xC3, 0xA6))).toEqual('æ');
expect(decoder.decode(Uint8Array.of(0xE2, 0x82, 0xAC))).toEqual('€');
expect(decoder.decode(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9))).toEqual('\uD83D\uDCA9');
});
it('should ignore byte-order marks', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(0xEF, 0xBB, 0xBF, 65)))
.toEqual('A');
});
it('should not ignore byte-order marks in the middle of the array', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(65, 0xEF, 0xBB, 0xBF, 66)))
.toEqual('A\uFEFFB');
});
it('should reject overlong encodings', function() {
const decoder = new TextDecoderPolyfill();
// euro, as 4 bytes
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x82, 0x82, 0xAC, 67)))
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFDC');
});
it('should reject 5 and 6-byte encodings', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(65, 0xF8, 0x82, 0x82, 0x82, 0x82, 67)))
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDC');
});
it('should reject code points beyond 0x10000', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(0xF4, 0xA0, 0x80, 0x80)))
.toEqual('\uFFFD\uFFFD\uFFFD\uFFFD');
});
it('should cope with end-of-string', function() {
const decoder = new TextDecoderPolyfill();
expect(decoder.decode(Uint8Array.of(65, 0xC3)))
.toEqual('A\uFFFD');
expect(decoder.decode(Uint8Array.of(65, 0xE2, 0x82)))
.toEqual('A\uFFFD\uFFFD');
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x9F, 0x92)))
.toEqual('A\uFFFD\uFFFD\uFFFD');
});
});

View file

@ -0,0 +1,39 @@
/*
Copyright 2017 Vector Creations Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
"use strict";
import TextEncoderPolyfill from 'utils/TextEncoderPolyfill';
import * as testUtils from '../test-utils';
import expect from 'expect';
describe('textEncoderPolyfill', function() {
beforeEach(function() {
testUtils.beforeEach(this);
});
it('should correctly encode a range of strings', function() {
const encoder = new TextEncoderPolyfill();
expect(encoder.encode('ABC')).toEqual(Uint8Array.of(65, 66, 67));
expect(encoder.encode('æ')).toEqual(Uint8Array.of(0xC3, 0xA6));
expect(encoder.encode('€')).toEqual(Uint8Array.of(0xE2, 0x82, 0xAC));
// PILE OF POO (💩)
expect(encoder.encode('\uD83D\uDCA9')).toEqual(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9));
});
});