2021-04-30 20:47:39 +00:00
|
|
|
export function normalizeUnicodeString (str) {
|
|
|
|
|
return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-02 19:43:22 +00:00
|
|
|
export function removePunctuationFromString (str) {
|
2019-10-10 18:11:50 +00:00
|
|
|
return str.replace(/[.,/#!@$%^&;:{}=\-_`~()]/g, ' ');
|
2017-08-02 19:43:22 +00:00
|
|
|
}
|
|
|
|
|
|
2020-04-28 14:47:52 +00:00
|
|
|
// NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
|
|
|
|
|
// so this method should not be used if wordsToMatch contains unsanitized user input
|
2020-11-09 10:34:28 +00:00
|
|
|
|
2017-08-02 19:43:22 +00:00
|
|
|
export function getMatchesByWordArray (str, wordsToMatch) {
|
2020-11-09 10:34:28 +00:00
|
|
|
// remove accented characters from the string, which would trip up the regEx
|
|
|
|
|
// later on, by using the built-in Unicode normalisation methods
|
|
|
|
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
|
|
|
|
|
// https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
|
|
|
|
|
// https://unicode-table.com/en/#combining-diacritical-marks
|
|
|
|
|
|
2019-10-08 14:57:10 +00:00
|
|
|
const matchedWords = [];
|
2021-04-30 20:47:39 +00:00
|
|
|
const wordRegexs = wordsToMatch.map(word => {
|
|
|
|
|
const normalizedWord = removePunctuationFromString(normalizeUnicodeString(word));
|
|
|
|
|
return new RegExp(`\\b([^a-z]+)?${normalizedWord}([^a-z]+)?\\b`, 'i');
|
|
|
|
|
});
|
2017-08-02 19:43:22 +00:00
|
|
|
for (let i = 0; i < wordRegexs.length; i += 1) {
|
2019-10-08 14:57:10 +00:00
|
|
|
const regEx = wordRegexs[i];
|
2021-04-30 20:47:39 +00:00
|
|
|
const match = removePunctuationFromString(normalizeUnicodeString(str)).match(regEx);
|
2017-08-02 19:43:22 +00:00
|
|
|
if (match !== null && match[0] !== null) {
|
2021-04-30 20:47:39 +00:00
|
|
|
const trimmedMatch = match[0].trim();
|
2017-08-02 19:43:22 +00:00
|
|
|
matchedWords.push(trimmedMatch);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return matchedWords;
|
|
|
|
|
}
|