export const xmlReplacer: RegExp = /["$&'<>\u0080-\uFFFF]/g; const xmlCodeMap = new Map([ [34, """], [38, "&"], [39, "'"], [60, "<"], [62, ">"], ]); // For compatibility with node < 4, we wrap `codePointAt` export const getCodePoint: (c: string, index: number) => number = // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition String.prototype.codePointAt == null ? (c: string, index: number): number => (c.charCodeAt(index) & 0xfc_00) === 0xd8_00 ? (c.charCodeAt(index) - 0xd8_00) * 0x4_00 + c.charCodeAt(index + 1) - 0xdc_00 + 0x1_00_00 : c.charCodeAt(index) : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae (input: string, index: number): number => input.codePointAt(index)!; /** * Encodes all non-ASCII characters, as well as characters not valid in XML * documents using XML entities. * * If a character has no equivalent entity, a * numeric hexadecimal reference (eg. `ü`) will be used. */ export function encodeXML(input: string): string { let returnValue = ""; let lastIndex = 0; let match; while ((match = xmlReplacer.exec(input)) !== null) { const { index } = match; const char = input.charCodeAt(index); const next = xmlCodeMap.get(char); if (next === undefined) { returnValue += `${input.substring(lastIndex, index)}&#x${getCodePoint( input, index, ).toString(16)};`; // Increase by 1 if we have a surrogate pair lastIndex = xmlReplacer.lastIndex += Number( (char & 0xfc_00) === 0xd8_00, ); } else { returnValue += input.substring(lastIndex, index) + next; lastIndex = index + 1; } } return returnValue + input.substr(lastIndex); } /** * Encodes all non-ASCII characters, as well as characters not valid in XML * documents using numeric hexadecimal reference (eg. `ü`). * * Have a look at `escapeUTF8` if you want a more concise output at the expense * of reduced transportability. * * @param data String to escape. */ export const escape: typeof encodeXML = encodeXML; /** * Creates a function that escapes all characters matched by the given regular * expression using the given map of characters to escape to their entities. * * @param regex Regular expression to match characters to escape. * @param map Map of characters to escape to their entities. * * @returns Function that escapes all characters matched by the given regular * expression using the given map of characters to escape to their entities. */ function getEscaper( regex: RegExp, map: Map, ): (data: string) => string { return function escape(data: string): string { let match; let lastIndex = 0; let result = ""; while ((match = regex.exec(data))) { if (lastIndex !== match.index) { result += data.substring(lastIndex, match.index); } // We know that this character will be in the map. result += map.get(match[0].charCodeAt(0))!; // Every match will be of length 1 lastIndex = match.index + 1; } return result + data.substring(lastIndex); }; } /** * Encodes all characters not valid in XML documents using XML entities. * * Note that the output will be character-set dependent. * * @param data String to escape. */ export const escapeUTF8: (data: string) => string = /* #__PURE__ */ getEscaper( /["&'<>]/g, xmlCodeMap, ); /** * Encodes all characters that have to be escaped in HTML attributes, * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. * * @param data String to escape. */ export const escapeAttribute: (data: string) => string = /* #__PURE__ */ getEscaper( /["&\u00A0]/g, new Map([ [34, """], [38, "&"], [160, " "], ]), ); /** * Encodes all characters that have to be escaped in HTML text, * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. * * @param data String to escape. */ export const escapeText: (data: string) => string = /* #__PURE__ */ getEscaper( /[&<>\u00A0]/g, new Map([ [38, "&"], [60, "<"], [62, ">"], [160, " "], ]), );