Corrected specific Uppercase slur replacement

2021-10-17 01:58:10 +02:00 · 2021-10-17 01:58:10 +02:00 · 8dd79e5368
parent ea21b9818f
commit 8dd79e5368
3 changed files with 122 additions and 28 deletions
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -13,11 +13,10 @@ site = environ.get("DOMAIN", '').strip()
 #       - "superretard"
 #       But not "superretarded"
 #
-# If all letters are lowercase then it will match lowercase, all variations of first letter of words up and all letters up
+# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
 #    "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
 #  	    - "dancing israelis"
 #  	    - "Dancing israelis"
-#       - "dancing Israelis"
 #       - "Dancing Israelis"
 #    	- "DANCING ISRAELIS"
 #
@ -39,7 +38,6 @@ site = environ.get("DOMAIN", '').strip()
 #   "kill yourself" -> "keep yourself safe"
 #   "Kill yourself" -> "Keep yourself safe"
 #   "Kill Yourself" -> "Keep Yourself Safe"
-#   "kill Yourself" -> "Keep yourself safe" (this one is harder to keep the capitalizaion, so it defaults to first word upper and the others lower)
 #   "KILL YOURSELF" -> "KEEP YOURSELF SAFE"
 #
 # If the replacement side has some capitalization, then that capitalization will always be maintained
@ -48,6 +46,8 @@ site = environ.get("DOMAIN", '').strip()
 #       "Pajeet" -> "Sexy Indian dude"
 #       "PAJEET" -> "SEXY INDIAN DUDE"
 #
+# There is a super special case that if the replacer starts with "http" then it never changes capitalization
+#
 #
 # TL;DR: Just read the above once, or don't, and try to guess!
 SLURS = {
@ -74,7 +74,7 @@ SLURS = {
    "latina": "latinx",
    "hispanics": "latinx",
    "hispanic": "latinx",
-    "uss liberty incident": "tragic accident aboard the USS Liberty",
+    "USS liberty incident": "tragic accident aboard the USS Liberty",
    "lavon affair": "Lavon Misunderstanding",
    "shylock": "Israeli friend",
    "yid": "Israeli friend",
@ -90,7 +90,7 @@ SLURS = {
    "i hate marsey": "i love marsey",
    "libertarian": "pedophile",
    "billie bilish": "Billie Eilish (fat cow)",
-    "dancing israelis": "i love Israel",
+    "dancing Israelis": "i love Israel",
    "sodomite": "total dreamboat",
    "pajeet": "sexy Indian dude",
    "female": "birthing person",
@ -99,7 +99,7 @@ SLURS = {
    "renter": "rentoid",
    "autistic": "neurodivergent",
    "anime": "p-dophilic japanese cartoons",
-    "holohoax": "I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
+    "holohoax": "i tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
    "groomercord": "discord (actually a pretty cool service)",
    "pedocord": "discord (actually a pretty cool service)",
    "i hate Carp": "i love Carp",
--- a/files/helpers/word_censor.py
+++ b/files/helpers/word_censor.py
@ -5,13 +5,42 @@ from re import Match
 from files.helpers.const import SLURS


-def create_replace_map():
-    dicts = [{
-        slur.strip(): replacer,
-        slur.strip().title(): replacer.title(),
-        slur.strip().capitalize(): replacer.capitalize(),
-        slur.strip().upper(): replacer.upper(),
-    } for (slur, replacer) in SLURS.items()]
+def first_upper(phrase: str) -> str:
+    """Converts the first character of the phrase to uppercase, not messing with the others"""
+    return phrase[0].upper() + phrase[1:]
+
+
+def first_all_upper(phrase: str) -> str:
+    """Converts the first character of each word to uppercase, not messing with the others"""
+    if " " not in phrase:
+        return first_upper(phrase)
+
+    return " ".join([first_upper(word) for word in phrase.split(" ")])
+
+
+def get_permutations_slur(slur: str, replacer: str = "_") -> dict[str, str]:
+    """
+    Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the
+    corresponding substitution with case
+    """
+    stripped = slur.strip()
+    is_link = replacer.startswith("http")  # special case for the :marseymerchant:
+
+    # the order the things are added into the dict is important, so that the 'Correctest' version is written last
+    result = {
+        stripped.upper(): replacer.upper() if not is_link else replacer,
+        first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer,
+        stripped.lower(): replacer,
+        stripped: replacer,
+        first_upper(stripped): first_upper(replacer) if not is_link else replacer,
+    }
+
+    return result
+
+
+def create_replace_map() -> dict[str: str]:
+    """Creates the map that will be used to get the mathing replaced for the given slur"""
+    dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]

    # flattens the list of dict to a single dict
    return dict(ChainMap(*dicts))
@ -20,21 +49,17 @@ def create_replace_map():
 REPLACE_MAP = create_replace_map()


-def create_variations_slur_regex(slur: str):
-    stripped = slur.strip()
-    variations = [stripped, stripped.upper(), stripped.capitalize()]
-
-    # capitalize multiple words if there are multiple words (just in case)
-    if " " in stripped:
-        variations.append(stripped.title())
+def create_variations_slur_regex(slur: str) -> list[str]:
+    """For a given match generates the corresponding replacer"""
+    permutations = get_permutations_slur(slur)

    if slur.startswith(" ") and slur.endswith(" "):
-        return [rf"(\s|>)({var})(\s|<)" for var in variations]
+        return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
    else:
-        return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations]
+        return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]


-def sub_matcher(match: Match):
+def sub_matcher(match: Match) -> str:
    # special case when it should match exact word
    if len(match.groups()) is 3:
        found = match.group(2)
@ -47,7 +72,7 @@ def sub_matcher(match: Match):
        return (match.group(1) or '') + replacer + (match.group(4) or '')


-def censor_slurs(body: str, logged_user):
+def censor_slurs(body: str, logged_user) -> str:
    if logged_user and not logged_user.slurreplacer:
        return body

--- a/test/files/helpers/test_word_censor.py
+++ b/test/files/helpers/test_word_censor.py
@ -4,7 +4,45 @@ from unittest.mock import patch
 from assertpy import assert_that

 from files.helpers import word_censor
-from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher
+from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
+    get_permutations_slur, first_upper, first_all_upper
+
+
+def test_first_upper():
+    assert_that(first_upper("USS liberty")).is_equal_to("USS liberty")
+    assert_that(first_upper("uss liberty")).is_equal_to("Uss liberty")
+    assert_that(first_upper("uss Liberty")).is_equal_to("Uss Liberty")
+
+
+def test_first_all_upper():
+    assert_that(first_all_upper("USS liberty")).is_equal_to("USS Liberty")
+    assert_that(first_all_upper("uss liberty")).is_equal_to("Uss Liberty")
+    assert_that(first_all_upper("uss Liberty")).is_equal_to("Uss Liberty")
+
+
+def test_get_permutations_slur():
+    expected = {
+        "USS liberty incident": "Tragic accident aboard the USS Liberty",
+        "uss liberty incident": "tragic accident aboard the USS Liberty",
+        "USS Liberty Incident": "Tragic Accident Aboard The USS Liberty",
+        "USS LIBERTY INCIDENT": "TRAGIC ACCIDENT ABOARD THE USS LIBERTY",
+    }
+
+    result = get_permutations_slur("USS liberty incident", "tragic accident aboard the USS Liberty")
+
+    assert_that(result).is_equal_to(expected)
+
+
+def test_get_permutations_slur_wiht_link_replacer():
+    expected = {
+        "kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
+        "Kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
+        "KIKE": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
+    }
+
+    result = get_permutations_slur("kike", "https://sciencedirect.com/science/article/abs/pii/S016028960600033X")
+
+    assert_that(result).is_equal_to(expected)


 def test_create_variations_slur_regex_for_slur_with_spaces():
@ -29,8 +67,8 @@ def test_create_variations_slur_regex_single_word():

 def test_create_variations_slur_regex_multiple_word():
    expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
-                r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
                r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
+                r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
                r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
    result = create_variations_slur_regex("kill yourself")

@ -41,6 +79,7 @@ def test_create_variations_slur_regex_multiple_word():
    "tranny": "🚂🚃🚃",
    "kill yourself": "keep yourself safe",
    "faggot": "cute twink",
+    "NoNewNormal": "NoNewNormal",
    " nig ": "🏀",
 })
 def test_create_replace_map():
@ -50,15 +89,19 @@ def test_create_replace_map():
        "TRANNY": "🚂🚃🚃",
        "kill yourself": "keep yourself safe",
        "Kill yourself": "Keep yourself safe",
-        "KILL YOURSELF": "KEEP YOURSELF SAFE",
        "Kill Yourself": "Keep Yourself Safe",
+        "KILL YOURSELF": "KEEP YOURSELF SAFE",
        "faggot": "cute twink",
        "Faggot": "Cute twink",
        "FAGGOT": "CUTE TWINK",
+        "NoNewNormal": "NoNewNormal",
+        "nonewnormal": "NoNewNormal",
+        "NONEWNORMAL": "NONEWNORMAL",
        "nig": "🏀",
        "Nig": "🏀",
        "NIG": "🏀",
    }
+
    result = create_replace_map()

    assert_that(result).is_equal_to(expected)
@ -79,7 +122,13 @@ def test_sub_matcher():
    assert_that(sub_matcher(match)).is_equal_to(">🏀 ")


-@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'})
+@patch("files.helpers.word_censor.SLURS", {
+    'retard': 'r-slur',
+    'manlet': 'little king',
+    ' nig ': '🏀',
+    'i hate Carp': 'i love Carp',
+    'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X'
+})
 def test_censor_slurs():
    word_censor.REPLACE_MAP = create_replace_map()

@ -101,9 +150,29 @@ def test_censor_slurs():
    assert_that(censor_slurs('<p>NIG</p>', None)).is_equal_to('<p>🏀</p>')
    assert_that(censor_slurs('... nigeria ...', None)).is_equal_to('... nigeria ...')

+    assert_that(censor_slurs('... i hate Carp ...', None)).is_equal_to('... i love Carp ...')
+    assert_that(censor_slurs('... i hate carp ...', None)).is_equal_to('... i love Carp ...')
+    assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
+    assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
+    assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
+
+    # Not covered:
+    assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
+    assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
+    assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
+
+    assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
+
    assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
        .is_equal_to("<p>r-slured SuperLittle king 🏀</p>")

+    assert_that(censor_slurs('... kike ...', None)) \
+        .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
+    assert_that(censor_slurs('... Kike ...', None)) \
+        .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
+    assert_that(censor_slurs('... KIKE ...', None)) \
+        .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
+

@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'})
 def test_censor_slurs_does_not_error_out_on_exception():