|
| 1 | +'use strict'; |
| 2 | + |
| 3 | +const { |
| 4 | + RegExpPrototypeExec, |
| 5 | + RegExpPrototypeSymbolReplace, |
| 6 | + StringFromCharCodeApply, |
| 7 | + StringPrototypeCharCodeAt, |
| 8 | + StringPrototypeIndexOf, |
| 9 | + StringPrototypeSlice, |
| 10 | + TypedArrayPrototypeSubarray, |
| 11 | + Uint8Array, |
| 12 | +} = primordials; |
| 13 | + |
| 14 | +const assert = require('internal/assert'); |
| 15 | +const { Buffer } = require('buffer'); |
| 16 | +const { MIMEType } = require('internal/mime'); |
| 17 | + |
| 18 | +let encoder; |
| 19 | +function lazyEncoder() { |
| 20 | + if (encoder === undefined) { |
| 21 | + const { TextEncoder } = require('internal/encoding'); |
| 22 | + encoder = new TextEncoder(); |
| 23 | + } |
| 24 | + |
| 25 | + return encoder; |
| 26 | +} |
| 27 | + |
| 28 | +const ASCII_WHITESPACE_REPLACE_REGEX = /[\u0009\u000A\u000C\u000D\u0020]/g // eslint-disable-line |
| 29 | + |
| 30 | +// https://fetch.spec.whatwg.org/#data-url-processor |
| 31 | +/** @param {URL} dataURL */ |
| 32 | +function dataURLProcessor(dataURL) { |
| 33 | + // 1. Assert: dataURL's scheme is "data". |
| 34 | + assert(dataURL.protocol === 'data:'); |
| 35 | + |
| 36 | + // 2. Let input be the result of running the URL |
| 37 | + // serializer on dataURL with exclude fragment |
| 38 | + // set to true. |
| 39 | + let input = URLSerializer(dataURL, true); |
| 40 | + |
| 41 | + // 3. Remove the leading "data:" string from input. |
| 42 | + input = StringPrototypeSlice(input, 5); |
| 43 | + |
| 44 | + // 4. Let position point at the start of input. |
| 45 | + const position = { position: 0 }; |
| 46 | + |
| 47 | + // 5. Let mimeType be the result of collecting a |
| 48 | + // sequence of code points that are not equal |
| 49 | + // to U+002C (,), given position. |
| 50 | + let mimeType = collectASequenceOfCodePointsFast( |
| 51 | + ',', |
| 52 | + input, |
| 53 | + position, |
| 54 | + ); |
| 55 | + |
| 56 | + // 6. Strip leading and trailing ASCII whitespace |
| 57 | + // from mimeType. |
| 58 | + // Undici implementation note: we need to store the |
| 59 | + // length because if the mimetype has spaces removed, |
| 60 | + // the wrong amount will be sliced from the input in |
| 61 | + // step #9 |
| 62 | + const mimeTypeLength = mimeType.length; |
| 63 | + mimeType = removeASCIIWhitespace(mimeType, true, true); |
| 64 | + |
| 65 | + // 7. If position is past the end of input, then |
| 66 | + // return failure |
| 67 | + if (position.position >= input.length) { |
| 68 | + return 'failure'; |
| 69 | + } |
| 70 | + |
| 71 | + // 8. Advance position by 1. |
| 72 | + position.position++; |
| 73 | + |
| 74 | + // 9. Let encodedBody be the remainder of input. |
| 75 | + const encodedBody = StringPrototypeSlice(input, mimeTypeLength + 1); |
| 76 | + |
| 77 | + // 10. Let body be the percent-decoding of encodedBody. |
| 78 | + let body = stringPercentDecode(encodedBody); |
| 79 | + |
| 80 | + // 11. If mimeType ends with U+003B (;), followed by |
| 81 | + // zero or more U+0020 SPACE, followed by an ASCII |
| 82 | + // case-insensitive match for "base64", then: |
| 83 | + if (RegExpPrototypeExec(/;(\u0020){0,}base64$/i, mimeType) !== null) { |
| 84 | + // 1. Let stringBody be the isomorphic decode of body. |
| 85 | + const stringBody = isomorphicDecode(body); |
| 86 | + |
| 87 | + // 2. Set body to the forgiving-base64 decode of |
| 88 | + // stringBody. |
| 89 | + body = forgivingBase64(stringBody); |
| 90 | + |
| 91 | + // 3. If body is failure, then return failure. |
| 92 | + if (body === 'failure') { |
| 93 | + return 'failure'; |
| 94 | + } |
| 95 | + |
| 96 | + // 4. Remove the last 6 code points from mimeType. |
| 97 | + mimeType = StringPrototypeSlice(mimeType, 0, -6); |
| 98 | + |
| 99 | + // 5. Remove trailing U+0020 SPACE code points from mimeType, |
| 100 | + // if any. |
| 101 | + mimeType = RegExpPrototypeSymbolReplace(/(\u0020)+$/, mimeType, ''); |
| 102 | + |
| 103 | + // 6. Remove the last U+003B (;) code point from mimeType. |
| 104 | + mimeType = StringPrototypeSlice(mimeType, 0, -1); |
| 105 | + } |
| 106 | + |
| 107 | + // 12. If mimeType starts with U+003B (;), then prepend |
| 108 | + // "text/plain" to mimeType. |
| 109 | + if (mimeType[0] === ';') { |
| 110 | + mimeType = 'text/plain' + mimeType; |
| 111 | + } |
| 112 | + |
| 113 | + // 13. Let mimeTypeRecord be the result of parsing |
| 114 | + // mimeType. |
| 115 | + // 14. If mimeTypeRecord is failure, then set |
| 116 | + // mimeTypeRecord to text/plain;charset=US-ASCII. |
| 117 | + let mimeTypeRecord; |
| 118 | + |
| 119 | + try { |
| 120 | + mimeTypeRecord = new MIMEType(mimeType); |
| 121 | + } catch { |
| 122 | + mimeTypeRecord = new MIMEType('text/plain;charset=US-ASCII'); |
| 123 | + } |
| 124 | + |
| 125 | + // 15. Return a new data: URL struct whose MIME |
| 126 | + // type is mimeTypeRecord and body is body. |
| 127 | + // https://fetch.spec.whatwg.org/#data-url-struct |
| 128 | + return { mimeType: mimeTypeRecord, body }; |
| 129 | +} |
| 130 | + |
| 131 | +// https://url.spec.whatwg.org/#concept-url-serializer |
| 132 | +/** |
| 133 | + * @param {URL} url |
| 134 | + * @param {boolean} excludeFragment |
| 135 | + */ |
| 136 | +function URLSerializer(url, excludeFragment = false) { |
| 137 | + const { href } = url; |
| 138 | + |
| 139 | + if (!excludeFragment) { |
| 140 | + return href; |
| 141 | + } |
| 142 | + |
| 143 | + const hashLength = url.hash.length; |
| 144 | + const serialized = hashLength === 0 ? href : StringPrototypeSlice(href, 0, href.length - hashLength); |
| 145 | + |
| 146 | + if (!hashLength && href[href.length - 1] === '#') { |
| 147 | + return StringPrototypeSlice(serialized, 0, -1); |
| 148 | + } |
| 149 | + |
| 150 | + return serialized; |
| 151 | +} |
| 152 | + |
| 153 | +/** |
| 154 | + * A faster collectASequenceOfCodePoints that only works when comparing a single character. |
| 155 | + * @param {string} char |
| 156 | + * @param {string} input |
| 157 | + * @param {{ position: number }} position |
| 158 | + */ |
| 159 | +function collectASequenceOfCodePointsFast(char, input, position) { |
| 160 | + const idx = StringPrototypeIndexOf(input, char, position.position); |
| 161 | + const start = position.position; |
| 162 | + |
| 163 | + if (idx === -1) { |
| 164 | + position.position = input.length; |
| 165 | + return StringPrototypeSlice(input, start); |
| 166 | + } |
| 167 | + |
| 168 | + position.position = idx; |
| 169 | + return StringPrototypeSlice(input, start, position.position); |
| 170 | +} |
| 171 | + |
| 172 | +// https://url.spec.whatwg.org/#string-percent-decode |
| 173 | +/** @param {string} input */ |
| 174 | +function stringPercentDecode(input) { |
| 175 | + // 1. Let bytes be the UTF-8 encoding of input. |
| 176 | + const bytes = lazyEncoder().encode(input); |
| 177 | + |
| 178 | + // 2. Return the percent-decoding of bytes. |
| 179 | + return percentDecode(bytes); |
| 180 | +} |
| 181 | + |
| 182 | +/** |
| 183 | + * @param {number} byte |
| 184 | + */ |
| 185 | +function isHexCharByte(byte) { |
| 186 | + // 0-9 A-F a-f |
| 187 | + return (byte >= 0x30 && byte <= 0x39) || (byte >= 0x41 && byte <= 0x46) || (byte >= 0x61 && byte <= 0x66); |
| 188 | +} |
| 189 | + |
| 190 | +/** |
| 191 | + * @param {number} byte |
| 192 | + */ |
| 193 | +function hexByteToNumber(byte) { |
| 194 | + return ( |
| 195 | + // 0-9 |
| 196 | + byte >= 0x30 && byte <= 0x39 ? |
| 197 | + (byte - 48) : |
| 198 | + // Convert to uppercase |
| 199 | + // ((byte & 0xDF) - 65) + 10 |
| 200 | + ((byte & 0xDF) - 55) |
| 201 | + ); |
| 202 | +} |
| 203 | + |
| 204 | +// https://url.spec.whatwg.org/#percent-decode |
| 205 | +/** @param {Uint8Array} input */ |
| 206 | +function percentDecode(input) { |
| 207 | + const length = input.length; |
| 208 | + // 1. Let output be an empty byte sequence. |
| 209 | + /** @type {Uint8Array} */ |
| 210 | + const output = new Uint8Array(length); |
| 211 | + let j = 0; |
| 212 | + // 2. For each byte byte in input: |
| 213 | + for (let i = 0; i < length; ++i) { |
| 214 | + const byte = input[i]; |
| 215 | + |
| 216 | + // 1. If byte is not 0x25 (%), then append byte to output. |
| 217 | + if (byte !== 0x25) { |
| 218 | + output[j++] = byte; |
| 219 | + |
| 220 | + // 2. Otherwise, if byte is 0x25 (%) and the next two bytes |
| 221 | + // after byte in input are not in the ranges |
| 222 | + // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F), |
| 223 | + // and 0x61 (a) to 0x66 (f), all inclusive, append byte |
| 224 | + // to output. |
| 225 | + } else if ( |
| 226 | + byte === 0x25 && |
| 227 | + !(isHexCharByte(input[i + 1]) && isHexCharByte(input[i + 2])) |
| 228 | + ) { |
| 229 | + output[j++] = 0x25; |
| 230 | + |
| 231 | + // 3. Otherwise: |
| 232 | + } else { |
| 233 | + // 1. Let bytePoint be the two bytes after byte in input, |
| 234 | + // decoded, and then interpreted as hexadecimal number. |
| 235 | + // 2. Append a byte whose value is bytePoint to output. |
| 236 | + output[j++] = (hexByteToNumber(input[i + 1]) << 4) | hexByteToNumber(input[i + 2]); |
| 237 | + |
| 238 | + // 3. Skip the next two bytes in input. |
| 239 | + i += 2; |
| 240 | + } |
| 241 | + } |
| 242 | + |
| 243 | + // 3. Return output. |
| 244 | + return length === j ? output : TypedArrayPrototypeSubarray(output, 0, j); |
| 245 | +} |
| 246 | + |
| 247 | +// https://infra.spec.whatwg.org/#forgiving-base64-decode |
| 248 | +/** @param {string} data */ |
| 249 | +function forgivingBase64(data) { |
| 250 | + // 1. Remove all ASCII whitespace from data. |
| 251 | + data = RegExpPrototypeSymbolReplace(ASCII_WHITESPACE_REPLACE_REGEX, data, ''); |
| 252 | + |
| 253 | + let dataLength = data.length; |
| 254 | + // 2. If data's code point length divides by 4 leaving |
| 255 | + // no remainder, then: |
| 256 | + if (dataLength % 4 === 0) { |
| 257 | + // 1. If data ends with one or two U+003D (=) code points, |
| 258 | + // then remove them from data. |
| 259 | + if (data[dataLength - 1] === '=') { |
| 260 | + --dataLength; |
| 261 | + if (data[dataLength - 1] === '=') { |
| 262 | + --dataLength; |
| 263 | + } |
| 264 | + } |
| 265 | + } |
| 266 | + |
| 267 | + // 3. If data's code point length divides by 4 leaving |
| 268 | + // a remainder of 1, then return failure. |
| 269 | + if (dataLength % 4 === 1) { |
| 270 | + return 'failure'; |
| 271 | + } |
| 272 | + |
| 273 | + // 4. If data contains a code point that is not one of |
| 274 | + // U+002B (+) |
| 275 | + // U+002F (/) |
| 276 | + // ASCII alphanumeric |
| 277 | + // then return failure. |
| 278 | + if (RegExpPrototypeExec(/[^+/0-9A-Za-z]/, data.length === dataLength ? data : StringPrototypeSlice(data, 0, dataLength)) !== null) { |
| 279 | + return 'failure'; |
| 280 | + } |
| 281 | + |
| 282 | + const buffer = Buffer.from(data, 'base64'); |
| 283 | + return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength); |
| 284 | +} |
| 285 | + |
| 286 | +/** |
| 287 | + * @see https://infra.spec.whatwg.org/#ascii-whitespace |
| 288 | + * @param {number} char |
| 289 | + */ |
| 290 | +function isASCIIWhitespace(char) { |
| 291 | + // "\r\n\t\f " |
| 292 | + return char === 0x00d || char === 0x00a || char === 0x009 || char === 0x00c || char === 0x020; |
| 293 | +} |
| 294 | + |
| 295 | +/** |
| 296 | + * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace |
| 297 | + * @param {string} str |
| 298 | + * @param {boolean} [leading=true] |
| 299 | + * @param {boolean} [trailing=true] |
| 300 | + */ |
| 301 | +function removeASCIIWhitespace(str, leading = true, trailing = true) { |
| 302 | + return removeChars(str, leading, trailing, isASCIIWhitespace); |
| 303 | +} |
| 304 | + |
| 305 | +/** |
| 306 | + * @param {string} str |
| 307 | + * @param {boolean} leading |
| 308 | + * @param {boolean} trailing |
| 309 | + * @param {(charCode: number) => boolean} predicate |
| 310 | + */ |
| 311 | +function removeChars(str, leading, trailing, predicate) { |
| 312 | + let lead = 0; |
| 313 | + let trail = str.length - 1; |
| 314 | + |
| 315 | + if (leading) { |
| 316 | + while (lead < str.length && predicate(StringPrototypeCharCodeAt(str, lead))) lead++; |
| 317 | + } |
| 318 | + |
| 319 | + if (trailing) { |
| 320 | + while (trail > 0 && predicate(StringPrototypeCharCodeAt(str, trail))) trail--; |
| 321 | + } |
| 322 | + |
| 323 | + return lead === 0 && trail === str.length - 1 ? str : StringPrototypeSlice(str, lead, trail + 1); |
| 324 | +} |
| 325 | + |
| 326 | +/** |
| 327 | + * @see https://infra.spec.whatwg.org/#isomorphic-decode |
| 328 | + * @param {Uint8Array} input |
| 329 | + * @returns {string} |
| 330 | + */ |
| 331 | +function isomorphicDecode(input) { |
| 332 | + // 1. To isomorphic decode a byte sequence input, return a string whose code point |
| 333 | + // length is equal to input's length and whose code points have the same values |
| 334 | + // as the values of input's bytes, in the same order. |
| 335 | + const length = input.length; |
| 336 | + if ((2 << 15) - 1 > length) { |
| 337 | + return StringFromCharCodeApply(input); |
| 338 | + } |
| 339 | + let result = ''; let i = 0; |
| 340 | + let addition = (2 << 15) - 1; |
| 341 | + while (i < length) { |
| 342 | + if (i + addition > length) { |
| 343 | + addition = length - i; |
| 344 | + } |
| 345 | + result += StringFromCharCodeApply(TypedArrayPrototypeSubarray(input, i, i += addition)); |
| 346 | + } |
| 347 | + return result; |
| 348 | +} |
| 349 | + |
| 350 | +module.exports = { |
| 351 | + dataURLProcessor, |
| 352 | +}; |
0 commit comments