|
import entities from './entities.js'; |
|
const windows_1252 = [ |
|
8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144, 8216, |
|
8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376 |
|
]; |
|
|
|
|
|
|
|
|
|
|
|
function reg_exp_entity(entity_name, is_attribute_value) { |
|
|
|
|
|
if (is_attribute_value && !entity_name.endsWith(';')) { |
|
return `${entity_name}\\b(?!=)`; |
|
} |
|
return entity_name; |
|
} |
|
|
|
|
|
|
|
|
|
function get_entity_pattern(is_attribute_value) { |
|
const reg_exp_num = '#(?:x[a-fA-F\\d]+|\\d+)(?:;)?'; |
|
const reg_exp_entities = Object.keys(entities).map((entity_name) => |
|
reg_exp_entity(entity_name, is_attribute_value) |
|
); |
|
const entity_pattern = new RegExp(`&(${reg_exp_num}|${reg_exp_entities.join('|')})`, 'g'); |
|
return entity_pattern; |
|
} |
|
const entity_pattern_content = get_entity_pattern(false); |
|
const entity_pattern_attr_value = get_entity_pattern(true); |
|
|
|
|
|
|
|
|
|
|
|
export function decode_character_references(html, is_attribute_value) { |
|
const entity_pattern = is_attribute_value ? entity_pattern_attr_value : entity_pattern_content; |
|
return html.replace(entity_pattern, (match, entity) => { |
|
let code; |
|
|
|
if (entity[0] !== '#') { |
|
code = entities[entity]; |
|
} else if (entity[1] === 'x') { |
|
code = parseInt(entity.substring(2), 16); |
|
} else { |
|
code = parseInt(entity.substring(1), 10); |
|
} |
|
if (!code) { |
|
return match; |
|
} |
|
return String.fromCodePoint(validate_code(code)); |
|
}); |
|
} |
|
const NUL = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function validate_code(code) { |
|
|
|
if (code === 10) { |
|
return 32; |
|
} |
|
|
|
if (code < 128) { |
|
return code; |
|
} |
|
|
|
|
|
if (code <= 159) { |
|
return windows_1252[code - 128]; |
|
} |
|
|
|
if (code < 55296) { |
|
return code; |
|
} |
|
|
|
if (code <= 57343) { |
|
return NUL; |
|
} |
|
|
|
if (code <= 65535) { |
|
return code; |
|
} |
|
|
|
if (code >= 65536 && code <= 131071) { |
|
return code; |
|
} |
|
|
|
if (code >= 131072 && code <= 196607) { |
|
return code; |
|
} |
|
return NUL; |
|
} |
|
|
|
const disallowed_contents = new Map([ |
|
['li', new Set(['li'])], |
|
['dt', new Set(['dt', 'dd'])], |
|
['dd', new Set(['dt', 'dd'])], |
|
[ |
|
'p', |
|
new Set( |
|
'address article aside blockquote div dl fieldset footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav ol p pre section table ul'.split( |
|
' ' |
|
) |
|
) |
|
], |
|
['rt', new Set(['rt', 'rp'])], |
|
['rp', new Set(['rt', 'rp'])], |
|
['optgroup', new Set(['optgroup'])], |
|
['option', new Set(['option', 'optgroup'])], |
|
['thead', new Set(['tbody', 'tfoot'])], |
|
['tbody', new Set(['tbody', 'tfoot'])], |
|
['tfoot', new Set(['tbody'])], |
|
['tr', new Set(['tr', 'tbody'])], |
|
['td', new Set(['td', 'th', 'tr'])], |
|
['th', new Set(['td', 'th', 'tr'])] |
|
]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function closing_tag_omitted(current, next) { |
|
if (disallowed_contents.has(current)) { |
|
if (!next || disallowed_contents.get(current).has(next)) { |
|
return true; |
|
} |
|
} |
|
return false; |
|
} |
|
|