diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index b2ca3c612bf6ef..4f23752b6197c9 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -405,61 +405,75 @@ function makeTextDecoderICU() { validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); const enc = getEncodingFromLabel(encoding); - if (enc === undefined) + if (enc === undefined || !hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding); let flags = 0; if (options !== null) { - flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; + if (options.fatal) { + throw new ERR_NO_ICU('"fatal" option'); + } flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; } this[kDecoder] = true; + // StringDecoder will normalize WHATWG encoding to Node.js encoding. + this[kHandle] = new (lazyStringDecoder())(enc); this[kFlags] = flags; this[kEncoding] = enc; - this[kIgnoreBOM] = Boolean(options?.ignoreBOM); - this[kFatal] = Boolean(options?.fatal); - // Only support fast path for UTF-8. - this[kUTF8FastPath] = enc === 'utf-8'; - this[kLatin1FastPath] = enc === 'windows-1252'; - this[kHandle] = undefined; - - if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) { - this.#prepareConverter(); - } - } - - #prepareConverter() { - if (this[kHandle] !== undefined) return; - const handle = getConverter(this[kEncoding], this[kFlags]); - if (handle === undefined) - throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]); - this[kHandle] = handle; + this[kBOMSeen] = false; } decode(input = empty, options = kEmptyObject) { validateDecoder(this); + + if (isAnyArrayBuffer(input)) { + try { + input = Buffer.from(input); + } catch { + input = empty; + } + } else if (isArrayBufferView(input)) { + try { + input = Buffer.from(input.buffer, input.byteOffset, + input.byteLength); + } catch { + input = empty; + } + } else { + throw new ERR_INVALID_ARG_TYPE('input', + ['ArrayBuffer', 'ArrayBufferView'], + input); + } - this[kUTF8FastPath] &&= !(options?.stream); - this[kLatin1FastPath] &&= !(options?.stream); + validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); - if (this[kUTF8FastPath]) { - return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); + if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { + this[kBOMSeen] = false; } - if (this[kLatin1FastPath]) { - return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]); + if (options !== null && options.stream) { + this[kFlags] &= ~CONVERTER_FLAGS_FLUSH; + } else { + this[kFlags] |= CONVERTER_FLAGS_FLUSH; } - this.#prepareConverter(); - - validateObject(options, 'options', kValidateObjectAllowObjectsAndNull); + let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ? + this[kHandle].end(input) : + this[kHandle].write(input); - let flags = 0; - if (options !== null) - flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; + if (result.length > 0 && + !this[kBOMSeen] && + !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) { + // If the very first result in the stream is a BOM, and we are not + // explicitly told to ignore it, then we discard it. + if (result[0] === '\ufeff') { + result = StringPrototypeSlice(result, 1); + } + this[kBOMSeen] = true; + } - return _decode(this[kHandle], input, flags, this.encoding); + return result; } } @@ -555,7 +569,6 @@ function makeTextDecoderJS() { return result; } } - return TextDecoder; } diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js index 7582da52a8b628..ad015005f7f709 100644 --- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js @@ -233,3 +233,14 @@ if (common.hasIntl) { const decoder = new TextDecoder(); assert.strictEqual(decoder.decode(buffer), ''); } + +//Big5 encoding error byte sequence handling +{ + const decoder = new TextDecoder('Big5'); + const input = new Uint8Array([0x83, 0x5C]); + const output = decoder.decode(input); + + assert.strictEqual(output.length, 2, 'Big5 error sequence should decode to 2 characters'); + assert.strictEqual(output.charCodeAt(0).toString(16), 'fffd', 'The first character should be U+FFFD'); + assert.strictEqual(output.charCodeAt(1).toString(16), '5c', 'The second character should be U+005C'); +} \ No newline at end of file