fix: unicode support in regex

This commit is contained in:
2025-06-27 14:05:12 +03:00
parent bca37e690f
commit ae1cf48c53

29
main.js
View File

@@ -60,29 +60,38 @@ function processNodes() {
} }
} }
if (activeWords.length > 0) { if (activeWords.length > 0) {
const wordMap = new Map(); const wordMap = new Map();
for (const word of activeWords) wordMap.set(word.text.toLowerCase(), word); for (const word of activeWords) {
wordMap.set(matchCase ? word.text : word.text.toLowerCase(), word);
}
let flags = matchCase ? 'g' : 'gi'; let flags = matchCase ? 'gu' : 'giu';
let wordsPattern = Array.from(wordMap.keys()).map(escapeRegex).join('|'); let wordsPattern = Array.from(wordMap.keys()).map(escapeRegex).join('|');
if (matchWhole) {
wordsPattern = `\\b(?:${wordsPattern})\\b`; if (matchWhole) {
} wordsPattern = `(?:(?<!\\p{L})|^)(${wordsPattern})(?:(?!\\p{L})|$)`;
}
try {
const pattern = new RegExp(`(${wordsPattern})`, flags); const pattern = new RegExp(`(${wordsPattern})`, flags);
for (const node of textNodes) { for (const node of textNodes) {
if (!pattern.test(node.nodeValue)) continue; if (!node.nodeValue || !pattern.test(node.nodeValue)) continue;
const span = document.createElement('span'); const span = document.createElement('span');
span.innerHTML = node.nodeValue.replace(pattern, match => { span.innerHTML = node.nodeValue.replace(pattern, match => {
const word = wordMap.get(match.toLowerCase()) || { background: '#ffff00', foreground: '#000000' }; const lookup = matchCase ? match : match.toLowerCase();
const word = wordMap.get(lookup) || { background: '#ffff00', foreground: '#000000' };
return `<mark data-gh style="background:${word.background};color:${word.foreground};padding:0 2px;">${match}</mark>`; return `<mark data-gh style="background:${word.background};color:${word.foreground};padding:0 2px;">${match}</mark>`;
}); });
node.parentNode.replaceChild(span, node); node.parentNode.replaceChild(span, node);
} }
} catch (e) {
console.error("Regex error:", e);
} }
}
observer.observe(document.body, { observer.observe(document.body, {
childList: true, childList: true,