habitica/website/server/libs/stringUtils.js

export function normalizeUnicodeString (str) {
  return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
}

export function removePunctuationFromString (str) {
  return str.replace(/[.,/#!@$%^&;:{}=\-_`~()]/g, ' ');
}

// NOTE: the wordsToMatch aren't escaped in order to support regular expressions,
// so this method should not be used if wordsToMatch contains unsanitized user input

export function getMatchesByWordArray (str, wordsToMatch) {
  // remove accented characters from the string, which would trip up the regEx
  // later on, by using the built-in Unicode normalisation methods
  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
  // https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence
  // https://unicode-table.com/en/#combining-diacritical-marks

  const matchedWords = [];
  const wordRegexs = wordsToMatch.map(word => {
    const normalizedWord = removePunctuationFromString(normalizeUnicodeString(word));
    return new RegExp(`\\b([^a-z]+)?${normalizedWord}([^a-z]+)?\\b`, 'i');
  });
  for (let i = 0; i < wordRegexs.length; i += 1) {
    const regEx = wordRegexs[i];
    const match = removePunctuationFromString(normalizeUnicodeString(str)).match(regEx);
    if (match !== null && match[0] !== null) {
      const trimmedMatch = match[0].trim();
      matchedWords.push(trimmedMatch);
    }
  }
  return matchedWords;
}
Add checks for profanity to profile updates (#12445) * fix(profile): detect attempt to use banned words as display name. refactor profanity detection method. * fix(profile): detect attempt to use banned words in blurb. further refactor profanity detection. inform the user their chat privileges have been revoked. * refactor: add function to normalize Unicode strings and remove diacritics * fix: improve regEx to prevent false partial matches e.g. 'hello' being recognised as banned words. porting fix from #12309 * fix(profile): refactor of profanity detection for #12445 * fix(profile): add test for swear words in new profile. fix existing tests * fix(profile): show different error message for attempted slur use in username by new users. * fix(profile): remove incorrect slur test * fix(profile): fix slurs not caught at start of end of strings connect by punctuation * tests(profile): fix tests for profanity checking * remove exclusive test * 11865 - update text for slur warnings * 11865 - remove unused string from locale files * 11865 - improve naming of banned word usage locale string * 11865 - improve logic so that differentiated warnings are shown depending on whether a slur or other profanity has been used in a display name * 11865 - construct slur regexes outside the validation function in which they are used * 11865 - fix tests 2021-04-30 20:47:39 +00:00			`export function normalizeUnicodeString (str) {`
			`return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '');`
			`}`

lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`export function removePunctuationFromString (str) {`
fix linting for server (except for length of apidoc) 2019-10-10 18:11:50 +00:00			return str.replace(/[.,/#!@$%^&;:{}=\-_`~()]/g, ' ');
lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`}`

fix(string utils): do not escape possible regular expressions 2020-04-28 14:47:52 +00:00			`// NOTE: the wordsToMatch aren't escaped in order to support regular expressions,`
			`// so this method should not be used if wordsToMatch contains unsanitized user input`
Revert "Revert "fix(banned words): fix partial matching of words containing diacritic… (#12444)"" This reverts commit 5362058f3552d2c410638372fd48bb9a91861d2b. 2020-11-09 10:34:28 +00:00
lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`export function getMatchesByWordArray (str, wordsToMatch) {`
Revert "Revert "fix(banned words): fix partial matching of words containing diacritic… (#12444)"" This reverts commit 5362058f3552d2c410638372fd48bb9a91861d2b. 2020-11-09 10:34:28 +00:00			`// remove accented characters from the string, which would trip up the regEx`
			`// later on, by using the built-in Unicode normalisation methods`
			`// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize`
			`// https://www.unicode.org/reports/tr15/#Canon_Compat_Equivalence`
			`// https://unicode-table.com/en/#combining-diacritical-marks`

start upgrading eslint 2019-10-08 14:57:10 +00:00			`const matchedWords = [];`
Add checks for profanity to profile updates (#12445) * fix(profile): detect attempt to use banned words as display name. refactor profanity detection method. * fix(profile): detect attempt to use banned words in blurb. further refactor profanity detection. inform the user their chat privileges have been revoked. * refactor: add function to normalize Unicode strings and remove diacritics * fix: improve regEx to prevent false partial matches e.g. 'hello' being recognised as banned words. porting fix from #12309 * fix(profile): refactor of profanity detection for #12445 * fix(profile): add test for swear words in new profile. fix existing tests * fix(profile): show different error message for attempted slur use in username by new users. * fix(profile): remove incorrect slur test * fix(profile): fix slurs not caught at start of end of strings connect by punctuation * tests(profile): fix tests for profanity checking * remove exclusive test * 11865 - update text for slur warnings * 11865 - remove unused string from locale files * 11865 - improve naming of banned word usage locale string * 11865 - improve logic so that differentiated warnings are shown depending on whether a slur or other profanity has been used in a display name * 11865 - construct slur regexes outside the validation function in which they are used * 11865 - fix tests 2021-04-30 20:47:39 +00:00			`const wordRegexs = wordsToMatch.map(word => {`
			`const normalizedWord = removePunctuationFromString(normalizeUnicodeString(word));`
			return new RegExp(`\\b([^a-z]+)?${normalizedWord}([^a-z]+)?\\b`, 'i');
			`});`
lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`for (let i = 0; i < wordRegexs.length; i += 1) {`
start upgrading eslint 2019-10-08 14:57:10 +00:00			`const regEx = wordRegexs[i];`
Add checks for profanity to profile updates (#12445) * fix(profile): detect attempt to use banned words as display name. refactor profanity detection method. * fix(profile): detect attempt to use banned words in blurb. further refactor profanity detection. inform the user their chat privileges have been revoked. * refactor: add function to normalize Unicode strings and remove diacritics * fix: improve regEx to prevent false partial matches e.g. 'hello' being recognised as banned words. porting fix from #12309 * fix(profile): refactor of profanity detection for #12445 * fix(profile): add test for swear words in new profile. fix existing tests * fix(profile): show different error message for attempted slur use in username by new users. * fix(profile): remove incorrect slur test * fix(profile): fix slurs not caught at start of end of strings connect by punctuation * tests(profile): fix tests for profanity checking * remove exclusive test * 11865 - update text for slur warnings * 11865 - remove unused string from locale files * 11865 - improve naming of banned word usage locale string * 11865 - improve logic so that differentiated warnings are shown depending on whether a slur or other profanity has been used in a display name * 11865 - construct slur regexes outside the validation function in which they are used * 11865 - fix tests 2021-04-30 20:47:39 +00:00			`const match = removePunctuationFromString(normalizeUnicodeString(str)).match(regEx);`
lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`if (match !== null && match[0] !== null) {`
Add checks for profanity to profile updates (#12445) * fix(profile): detect attempt to use banned words as display name. refactor profanity detection method. * fix(profile): detect attempt to use banned words in blurb. further refactor profanity detection. inform the user their chat privileges have been revoked. * refactor: add function to normalize Unicode strings and remove diacritics * fix: improve regEx to prevent false partial matches e.g. 'hello' being recognised as banned words. porting fix from #12309 * fix(profile): refactor of profanity detection for #12445 * fix(profile): add test for swear words in new profile. fix existing tests * fix(profile): show different error message for attempted slur use in username by new users. * fix(profile): remove incorrect slur test * fix(profile): fix slurs not caught at start of end of strings connect by punctuation * tests(profile): fix tests for profanity checking * remove exclusive test * 11865 - update text for slur warnings * 11865 - remove unused string from locale files * 11865 - improve naming of banned word usage locale string * 11865 - improve logic so that differentiated warnings are shown depending on whether a slur or other profanity has been used in a display name * 11865 - construct slur regexes outside the validation function in which they are used * 11865 - fix tests 2021-04-30 20:47:39 +00:00			`const trimmedMatch = match[0].trim();`
lists banned words in the chat error message - fixes https://github.com/HabitRPG/habitica/issues/8812 (#8858) * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * add test to check the error message contains the banned words used * improve banned words test * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * small fix for unnecessary empty array * added test and did some small refactoring * lint error fix * issue 8812 - added the list of bad words matched to the postChat error message. * issue 8812 - some refactoring, fixed relevant tests, and lint rules refactor * add test to check the error message contains the banned words used * improve banned words test * merge with develop - aligned banned slurs check with banned words check 2017-08-02 19:43:22 +00:00			`matchedWords.push(trimmedMatch);`
			`}`
			`}`
			`return matchedWords;`
			`}`