diff --git a/files/helpers/const.py b/files/helpers/const.py index 8b17b241b..7593e82a3 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -13,11 +13,10 @@ site = environ.get("DOMAIN", '').strip() # - "superretard" # But not "superretarded" # -# If all letters are lowercase then it will match lowercase, all variations of first letter of words up and all letters up +# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up # "dancing israelis" will match (with prefixes and suffixes omitted for brevity): # - "dancing israelis" # - "Dancing israelis" -# - "dancing Israelis" # - "Dancing Israelis" # - "DANCING ISRAELIS" # @@ -39,7 +38,6 @@ site = environ.get("DOMAIN", '').strip() # "kill yourself" -> "keep yourself safe" # "Kill yourself" -> "Keep yourself safe" # "Kill Yourself" -> "Keep Yourself Safe" -# "kill Yourself" -> "Keep yourself safe" (this one is harder to keep the capitalizaion, so it defaults to first word upper and the others lower) # "KILL YOURSELF" -> "KEEP YOURSELF SAFE" # # If the replacement side has some capitalization, then that capitalization will always be maintained @@ -48,6 +46,8 @@ site = environ.get("DOMAIN", '').strip() # "Pajeet" -> "Sexy Indian dude" # "PAJEET" -> "SEXY INDIAN DUDE" # +# There is a super special case that if the replacer starts with "http" then it never changes capitalization +# # # TL;DR: Just read the above once, or don't, and try to guess! SLURS = { @@ -74,7 +74,7 @@ SLURS = { "latina": "latinx", "hispanics": "latinx", "hispanic": "latinx", - "uss liberty incident": "tragic accident aboard the USS Liberty", + "USS liberty incident": "tragic accident aboard the USS Liberty", "lavon affair": "Lavon Misunderstanding", "shylock": "Israeli friend", "yid": "Israeli friend", @@ -90,7 +90,7 @@ SLURS = { "i hate marsey": "i love marsey", "libertarian": "pedophile", "billie bilish": "Billie Eilish (fat cow)", - "dancing israelis": "i love Israel", + "dancing Israelis": "i love Israel", "sodomite": "total dreamboat", "pajeet": "sexy Indian dude", "female": "birthing person", @@ -99,7 +99,7 @@ SLURS = { "renter": "rentoid", "autistic": "neurodivergent", "anime": "p-dophilic japanese cartoons", - "holohoax": "I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", + "holohoax": "i tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", "groomercord": "discord (actually a pretty cool service)", "pedocord": "discord (actually a pretty cool service)", "i hate Carp": "i love Carp", diff --git a/files/helpers/word_censor.py b/files/helpers/word_censor.py index a1e8ab788..e56f82da3 100644 --- a/files/helpers/word_censor.py +++ b/files/helpers/word_censor.py @@ -5,13 +5,42 @@ from re import Match from files.helpers.const import SLURS -def create_replace_map(): - dicts = [{ - slur.strip(): replacer, - slur.strip().title(): replacer.title(), - slur.strip().capitalize(): replacer.capitalize(), - slur.strip().upper(): replacer.upper(), - } for (slur, replacer) in SLURS.items()] +def first_upper(phrase: str) -> str: + """Converts the first character of the phrase to uppercase, not messing with the others""" + return phrase[0].upper() + phrase[1:] + + +def first_all_upper(phrase: str) -> str: + """Converts the first character of each word to uppercase, not messing with the others""" + if " " not in phrase: + return first_upper(phrase) + + return " ".join([first_upper(word) for word in phrase.split(" ")]) + + +def get_permutations_slur(slur: str, replacer: str = "_") -> dict[str, str]: + """ + Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the + corresponding substitution with case + """ + stripped = slur.strip() + is_link = replacer.startswith("http") # special case for the :marseymerchant: + + # the order the things are added into the dict is important, so that the 'Correctest' version is written last + result = { + stripped.upper(): replacer.upper() if not is_link else replacer, + first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer, + stripped.lower(): replacer, + stripped: replacer, + first_upper(stripped): first_upper(replacer) if not is_link else replacer, + } + + return result + + +def create_replace_map() -> dict[str: str]: + """Creates the map that will be used to get the mathing replaced for the given slur""" + dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()] # flattens the list of dict to a single dict return dict(ChainMap(*dicts)) @@ -20,21 +49,17 @@ def create_replace_map(): REPLACE_MAP = create_replace_map() -def create_variations_slur_regex(slur: str): - stripped = slur.strip() - variations = [stripped, stripped.upper(), stripped.capitalize()] - - # capitalize multiple words if there are multiple words (just in case) - if " " in stripped: - variations.append(stripped.title()) +def create_variations_slur_regex(slur: str) -> list[str]: + """For a given match generates the corresponding replacer""" + permutations = get_permutations_slur(slur) if slur.startswith(" ") and slur.endswith(" "): - return [rf"(\s|>)({var})(\s|<)" for var in variations] + return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()] else: - return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations] + return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()] -def sub_matcher(match: Match): +def sub_matcher(match: Match) -> str: # special case when it should match exact word if len(match.groups()) is 3: found = match.group(2) @@ -47,7 +72,7 @@ def sub_matcher(match: Match): return (match.group(1) or '') + replacer + (match.group(4) or '') -def censor_slurs(body: str, logged_user): +def censor_slurs(body: str, logged_user) -> str: if logged_user and not logged_user.slurreplacer: return body diff --git a/test/files/helpers/test_word_censor.py b/test/files/helpers/test_word_censor.py index af89081b7..1ee2c8922 100644 --- a/test/files/helpers/test_word_censor.py +++ b/test/files/helpers/test_word_censor.py @@ -4,7 +4,45 @@ from unittest.mock import patch from assertpy import assert_that from files.helpers import word_censor -from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher +from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \ + get_permutations_slur, first_upper, first_all_upper + + +def test_first_upper(): + assert_that(first_upper("USS liberty")).is_equal_to("USS liberty") + assert_that(first_upper("uss liberty")).is_equal_to("Uss liberty") + assert_that(first_upper("uss Liberty")).is_equal_to("Uss Liberty") + + +def test_first_all_upper(): + assert_that(first_all_upper("USS liberty")).is_equal_to("USS Liberty") + assert_that(first_all_upper("uss liberty")).is_equal_to("Uss Liberty") + assert_that(first_all_upper("uss Liberty")).is_equal_to("Uss Liberty") + + +def test_get_permutations_slur(): + expected = { + "USS liberty incident": "Tragic accident aboard the USS Liberty", + "uss liberty incident": "tragic accident aboard the USS Liberty", + "USS Liberty Incident": "Tragic Accident Aboard The USS Liberty", + "USS LIBERTY INCIDENT": "TRAGIC ACCIDENT ABOARD THE USS LIBERTY", + } + + result = get_permutations_slur("USS liberty incident", "tragic accident aboard the USS Liberty") + + assert_that(result).is_equal_to(expected) + + +def test_get_permutations_slur_wiht_link_replacer(): + expected = { + "kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "Kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "KIKE": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + } + + result = get_permutations_slur("kike", "https://sciencedirect.com/science/article/abs/pii/S016028960600033X") + + assert_that(result).is_equal_to(expected) def test_create_variations_slur_regex_for_slur_with_spaces(): @@ -29,8 +67,8 @@ def test_create_variations_slur_regex_single_word(): def test_create_variations_slur_regex_multiple_word(): expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)", - r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)", r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)", + r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)", r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"] result = create_variations_slur_regex("kill yourself") @@ -41,6 +79,7 @@ def test_create_variations_slur_regex_multiple_word(): "tranny": "🚂🚃🚃", "kill yourself": "keep yourself safe", "faggot": "cute twink", + "NoNewNormal": "NoNewNormal", " nig ": "🏀", }) def test_create_replace_map(): @@ -50,15 +89,19 @@ def test_create_replace_map(): "TRANNY": "🚂🚃🚃", "kill yourself": "keep yourself safe", "Kill yourself": "Keep yourself safe", - "KILL YOURSELF": "KEEP YOURSELF SAFE", "Kill Yourself": "Keep Yourself Safe", + "KILL YOURSELF": "KEEP YOURSELF SAFE", "faggot": "cute twink", "Faggot": "Cute twink", "FAGGOT": "CUTE TWINK", + "NoNewNormal": "NoNewNormal", + "nonewnormal": "NoNewNormal", + "NONEWNORMAL": "NONEWNORMAL", "nig": "🏀", "Nig": "🏀", "NIG": "🏀", } + result = create_replace_map() assert_that(result).is_equal_to(expected) @@ -79,7 +122,13 @@ def test_sub_matcher(): assert_that(sub_matcher(match)).is_equal_to(">🏀 ") -@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'}) +@patch("files.helpers.word_censor.SLURS", { + 'retard': 'r-slur', + 'manlet': 'little king', + ' nig ': '🏀', + 'i hate Carp': 'i love Carp', + 'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X' +}) def test_censor_slurs(): word_censor.REPLACE_MAP = create_replace_map() @@ -101,9 +150,29 @@ def test_censor_slurs(): assert_that(censor_slurs('

NIG

', None)).is_equal_to('

🏀

') assert_that(censor_slurs('... nigeria ...', None)).is_equal_to('... nigeria ...') + assert_that(censor_slurs('... i hate Carp ...', None)).is_equal_to('... i love Carp ...') + assert_that(censor_slurs('... i hate carp ...', None)).is_equal_to('... i love Carp ...') + assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...') + assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...') + assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...') + + # Not covered: + assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...') + assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...') + assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...') + + assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...') + assert_that(censor_slurs("

retarded SuperManlet NIG

", None)) \ .is_equal_to("

r-slured SuperLittle king 🏀

") + assert_that(censor_slurs('... kike ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + assert_that(censor_slurs('... Kike ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + assert_that(censor_slurs('... KIKE ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + @patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'}) def test_censor_slurs_does_not_error_out_on_exception():