Now the slur replacer only matches full words

2021-10-19 23:11:19 +02:00 · 2021-10-19 23:11:19 +02:00 · 3602a6446e
parent e2c7a74cdd
commit 3602a6446e
3 changed files with 21 additions and 48 deletions
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -5,35 +5,20 @@ site = environ.get("DOMAIN", '').strip()
 #####################
 # Formatting rules: #
 #####################
-#
-# on the slur side, they will match prefixes and suffixes and not middle of words, so for example
-#   "retard" will match:
-#       - "retard"
-#       - "retarded"
-#       - "superretard"
-#       But not "superretarded"
-#
 # If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
-#    "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
+#    "dancing israelis" will match:
 #       - "dancing israelis"
 #       - "Dancing israelis"
 #       - "Dancing Israelis"
 #       - "DANCING ISRAELIS"
 #
 # If some letters are Uppercase, the same, but with the additional option of the original casing, and respecting already existing uppercase
-#    "NoNewNormal" will match (with prefixes and suffixes omitted for brevity):
+#    "NoNewNormal" will match:
 #       - "NoNewNormal"
 #       - "nonewnormal"
 #       - "Nonewnormal"
 #       - "NONEWNORMAL"
 #
-# If the slur has a space before and after then the match is limited to the exact word, no prefixes or suffixes
-# (previous rules about capitalization still apply)
-#   " neg " will match only:
-#       - "neg"
-#       - "Neg"
-#       - "NEG"
-#
 # Now on the replacement side, The replacement will have the same capitalization as the slur if the replacement is lowercase
 #   "kill yourself" -> "keep yourself safe"
 #   "Kill yourself" -> "Keep yourself safe"
@ -48,7 +33,6 @@ site = environ.get("DOMAIN", '').strip()
 #
 # There is a super special case that if the replacer starts with "http" then it never changes capitalization
 #
-#
 # TL;DR: Just read the above once, or don't, and try to guess!
 SLURS = {
    "faggot": "cute twink",
@ -113,10 +97,8 @@ SLURS = {
    "china flu": "SARS-CoV-2 syndemic",
    "china virus": "SARS-CoV-2 syndemic",
    "kung flu": "SARS-CoV-2 syndemic",
-
-    # if  the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes
-    " nig ": "🏀",
-    " nigs ": "🏀s",
+    "nig": "🏀",
+    "nigs": "🏀s",
 }

 LONGPOST_REPLIES = ['Wow, you must be a JP fan.', 'This is one of the worst posts I have EVER seen. Delete it.', "No, don't reply like this, please do another wall of unhinged rant please.", '# 😴😴😴', "Ma'am we've been over this before. You need to stop.", "I've known more coherent downies.", "Your pulitzer's in the mail", "That's great and all, but I asked for my burger without cheese.", 'That degree finally paying off', "That's nice sweaty. Why don't you have a seat in the time out corner with Pizzashill until you calm down, then you can have your Capri Sun.", "All them words won't bring your pa back.", "You had a chance to not be completely worthless, but it looks like you threw it away. At least you're consistent.", 'Some people are able to display their intelligence by going on at length on a subject and never actually saying anything. This ability is most common in trades such as politics, public relations, and law. You have impressed me by being able to best them all, while still coming off as an absolute idiot.', "You can type 10,000 characters and you decided that these were the one's that you wanted.", 'Have you owned the libs yet?', "I don't know what you said, because I've seen another human naked.", 'Impressive. Normally people with such severe developmental disabilities struggle to write much more than a sentence or two. He really has exceded our expectations for the writing portion. Sadly the coherency of his writing, along with his abilities in the social skills and reading portions, are far behind his peers with similar disabilities.', "This is a really long way of saying you don't fuck.", "Sorry ma'am, looks like his delusions have gotten worse. We'll have to admit him,", '![](https://i.kym-cdn.com/photos/images/newsfeed/001/038/094/0a1.jpg)', 'If only you could put that energy into your relationships', 'Posts like this is why I do Heroine.', 'still unemployed then?', 'K', 'look im gunna have 2 ask u 2 keep ur giant dumps in the toilet not in my replys 😷😷😷', "Mommy is soooo proud of you, sweaty. Let's put this sperg out up on the fridge with all your other failures.", "Good job bobby, here's a star", "That was a mistake. You're about to find out the hard way why.", 'You sat down and wrote all this shit. You could have done so many other things with your life. What happened to your life that made you decide writing novels of bullshit on rdrama.net was the best option?', "I don't have enough spoons to read this shit", "All those words won't bring daddy back.", 'OUT!']
--- a/files/helpers/word_censor.py
+++ b/files/helpers/word_censor.py
@ -40,20 +40,14 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:


 def create_slur_regex() -> Pattern[str]:
-    # words that can have suffixes and prefixes
-    words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
+    """Creates the regex that will find the slurs"""
+    single_words = "|".join([slur.strip().lower() for slur in SLURS.keys()])

-    # to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
-    regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"
-
-    # words that need to match exactly
-    single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
-
-    return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")
+    return re.compile(rf"(?i)(?<=\s|>)({single_words})(?=\s|<)")


 def create_replace_map() -> Dict[str, str]:
-    """Creates the map that will be used to get the mathing replaced for the given slur"""
+    """Creates the map that will be used to get the matching replaced for the given slur"""
    dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]

    # flattens the list of dict to a single dict
--- a/test/files/helpers/test_word_censor.py
+++ b/test/files/helpers/test_word_censor.py
@ -48,11 +48,11 @@ def test_get_permutations_slur_wiht_link_replacer():
@patch("files.helpers.word_censor.SLURS", {
    "kill yourself": "keep yourself safe",
    "faggot": "cute twink",
-    " nig ": "🏀",
-    " retard ": "r-slur",
+    "nig": "🏀",
+    "retard": "r-slur",
 })
 def test_create_slur_regex():
-    expected = r"(?i)(?<=\s|>)(kill yourself|faggot)|(kill yourself|faggot)(?=\s|<)|(?<=\s|>)(nig|retard)(?=\s|<)"
+    expected = r"(?i)(?<=\s|>)(kill yourself|faggot|nig|retard)(?=\s|<)"

    assert_that(create_slur_regex()).is_equal_to(re.compile(expected))

@ -62,7 +62,7 @@ def test_create_slur_regex():
    "kill yourself": "keep yourself safe",
    "faggot": "cute twink",
    "NoNewNormal": "NoNewNormal",
-    " nig ": "🏀",
+    "nig": "🏀",
 })
 def test_create_replace_map():
    expected = {
@ -97,10 +97,7 @@ def test_sub_matcher():
    match = regex.search("<p>retard</p>")
    assert_that(sub_matcher(match)).is_equal_to("r-slur")

-    match = regex.search("<p>noretard</p>")
-    assert_that(sub_matcher(match)).is_equal_to("r-slur")
-
-    match = regex.search("<p>ReTaRdEd</p>")
+    match = regex.search("<p>ReTaRd</p>")
    assert_that(sub_matcher(match)).is_equal_to("r-slur")

    match = regex.search("<p>NIG</p>")
@ -113,7 +110,7 @@ def test_sub_matcher():
@patch("files.helpers.word_censor.SLURS", {
    'retard': 'r-slur',
    'manlet': 'little king',
-    ' nig ': '🏀',
+    'nig': '🏀',
    'i hate Carp': 'i love Carp',
    'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X'
 })
@ -122,12 +119,13 @@ def test_censor_slurs():
    word_censor.SLUR_REGEX = create_slur_regex()

    assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
-    assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
-    assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
-    assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
    assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
-    assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
+    assert_that(censor_slurs("<p>Manlet get out!</p>", None)).is_equal_to("<p>Little king get out!</p>")

+    # does not work:
+    assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>preretard</p>")
+    assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is Retarded like")
+    assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERRETARD like")
    assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
    assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')

@ -150,8 +148,7 @@ def test_censor_slurs():

    assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')

-    assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
-        .is_equal_to("<p>r-slured SuperLittle king 🏀</p>")
+    assert_that(censor_slurs("<p>retard Manlet NIG</p>", None)).is_equal_to("<p>r-slur Little king 🏀</p>")

    assert_that(censor_slurs('... kike ...', None)) \
        .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
@ -168,7 +165,7 @@ def test_censor_slurs_does_not_error_out_on_exception():
    word_censor.REPLACE_MAP["manlet"] = None
    word_censor.REPLACE_MAP["Manlet"] = None

-    assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet 🏀<")
+    assert_that(censor_slurs(">retard Manlet NIG<", None)).is_equal_to(">r-slur Manlet 🏀<")


@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})