diff --git a/files/helpers/const.py b/files/helpers/const.py index f3becb9b7..652a63948 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -3,140 +3,33 @@ from os import environ site = environ.get("DOMAIN", '').strip() SLURS = { - " faggot":" cute twink", - " Faggot":" Cute twink", - " FAGGOT":" CUTE TWINK", - " fag":" cute twink", - " Fag":" Cute twink", - " FAG":" CUTE TWINK", - " pedophile":" libertarian", - " Pedophile":" Libertarian", - " PEDOPHILE":" LIBERTARIAN", - " pedo":" libertarian", - " Pedo":" Libertarian", - " PEDO":" LIBERTARIAN", - " kill yourself":" keep yourself safe", - " KILL YOURSELF":" KEEP YOURSELF SAFE", - " nigger":" 🏀", - " Nigger":" 🏀", - " NIGGER":" 🏀", - " rapist":" male feminist", - " Rapist":" Male feminist", - " RAPIST":" MALE FEMINIST", - " steve akins":" penny verity oaken", - " Steve Akins":" Penny Verity Oaken", - " STEVE AKINS":" PENNY VERITY OAKEN", - " trannie":" 🚂🚃🚃", - " Trannie":" 🚂🚃🚃", - " TRANNIE":" 🚂🚃🚃", - " tranny":" 🚂🚃🚃", - " Tranny":" 🚂🚃🚃", - " TRANNY":" 🚂🚃🚃", - " troon":" 🚂🚃🚃", - " Troon":" 🚂🚃🚃", - " TROON":" 🚂🚃🚃", - " NoNewNormal": " HorseDewormerAddicts", - " nonewnormal": " horsedewormeraddicts", - " Kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X", - " kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X", - " retard":" r-slur", - " Retard":" R-slur", - " RETARD":" R-SLUR", - " janny":" j-slur", - " Janny":" J-slur", - " JANNY":" J-SLUR", - " jannie":" j-slur", - " Jannie":" J-slur", - " JANNIE":" J-SLUR", - " janny":" j-slur", - " Janny":" J-slur", - " JANNY":" J-SLUR", - " jannie":" j-slur", - " Jannie":" J-slur", - " JANNIE":" J-SLUR", - " latinos":" latinx", - " latino":" latinx", - " latinas":" latinx", - " latina":" latinx", - " hispanics":" latinx", - " hispanic":" latinx", - " Latinos":" Latinx", - " Latino":" Latinx", - " Latinas":" Latinx", - " Latina":" Latinx", - " Hispanics":" Latinx", - " Hispanic":" Latinx", - " LATINOS":" LATINX", - " LATINO":" LATINX", - " LATINAS":" LATINX", - " LATINA":" LATINX", - " HISPANICS":" LATINX", - " HISPANIC":" LATINX", + "faggot": "cute twink", + "fag": " cute twink", + "pedophile": "libertarian", + "pedo": " libertarian", + "kill yourself": "keep yourself safe", + "nigger": "🏀", + "rapist": "male feminist", + "steve akins": "penny verity oaken", + "trannie": "🚂🚃🚃", + "tranny": "🚂🚃🚃", + "troon": "🚂🚃🚃", + "NoNewNormal": " HorseDewormerAddicts", + "kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "retard": "r-slur", + "janny": " j-slur", + "jannie": " j-slur", + "janny": " j-slur", + "latinos": "latinx", + "latino": "latinx", + "latinas": "latinx", + "latina": "latinx", + "hispanics": "latinx", + "hispanic": "latinx", - "faggot ":"cute twink ", - "Faggot ":"Cute twink ", - "FAGGOT ":"CUTE TWINK ", - "fag ":"cute twink ", - "Fag ":"Cute twink ", - "FAG ":"CUTE TWINK ", - "pedophile ":"libertarian ", - "Pedophile ":"Libertarian ", - "PEDOPHILE ":"LIBERTARIAN ", - "kill yourself ":"keep yourself safe ", - "KILL YOURSELF ":"KEEP YOURSELF SAFE ", - "nigger ":"🏀 ", - "Nigger ":"🏀 ", - "NIGGER ":"🏀 ", - "steve akins ":"penny verity oaken ", - "Steve Akins ":"Penny Verity Oaken ", - "STEVE AKINS ":"PENNY VERITY OAKEN ", - "trannie ":"🚂🚃🚃 ", - "Trannie ":"🚂🚃🚃 ", - "TRANNIE ":"🚂🚃🚃 ", - "tranny ":"🚂🚃🚃 ", - "Tranny ":"🚂🚃🚃 ", - "TRANNY ":"🚂🚃🚃 ", - "troon ":"🚂🚃🚃 ", - "Troon ":"🚂🚃🚃 ", - "TROON ":"🚂🚃🚃 ", - "NoNewNormal ": "HorseDewormerAddicts ", - "nonewnormal ": "horsedewormeraddicts ", - "Kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ", - "kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ", - "retard ":"r-slur ", - "Retard ":"R-slur ", - "RETARD ":"R-SLUR ", - "janny ":"j-slur ", - "Janny ":"J-slur ", - "JANNY ":"J-SLUR ", - "jannie ":"j-slur ", - "Jannie ":"J-slur ", - "JANNIE ":"J-SLUR ", - "latinos ":"latinx ", - "latino ":"latinx ", - "latinas ":"latinx ", - "latina ":"latinx ", - "hispanics ":"latinx ", - "hispanic ":"latinx ", - "Latinos ":"Latinx ", - "Latino ":"Latinx ", - "Latinas ":"Latinx ", - "Latina ":"Latinx ", - "Hispanics ":"Latinx ", - "Hispanic ":"Latinx ", - "LATINOS ":"LATINX ", - "LATINO ":"LATINX ", - "LATINAS ":"LATINX ", - "LATINA ":"LATINX ", - "HISPANICS ":"LATINX ", - "HISPANIC ":"LATINX ", - - " nig ":" 🏀 ", - " Nig ":" 🏀 ", - " NIG ":" 🏀 ", - " nigs ":" 🏀s ", - " Nigs ":" 🏀s ", - " NIGS ":" 🏀s ", + # if the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes + " nig ": "🏀", + " nigs ": "🏀s", } LONGPOST_REPLIES = ['Wow, you must be a JP fan.', 'This is one of the worst posts I have EVER seen. Delete it.', "No, don't reply like this, please do another wall of unhinged rant please.", '# 😴😴😴', "Ma'am we've been over this before. You need to stop.", "I've known more coherent downies.", "Your pulitzer's in the mail", "That's great and all, but I asked for my burger without cheese.", 'That degree finally paying off', "That's nice sweaty. Why don't you have a seat in the time out corner with Pizzashill until you calm down, then you can have your Capri Sun.", "All them words won't bring your pa back.", "You had a chance to not be completely worthless, but it looks like you threw it away. At least you're consistent.", 'Some people are able to display their intelligence by going on at length on a subject and never actually saying anything. This ability is most common in trades such as politics, public relations, and law. You have impressed me by being able to best them all, while still coming off as an absolute idiot.', "You can type 10,000 characters and you decided that these were the one's that you wanted.", 'Have you owned the libs yet?', "I don't know what you said, because I've seen another human naked.", 'Impressive. Normally people with such severe developmental disabilities struggle to write much more than a sentence or two. He really has exceded our expectations for the writing portion. Sadly the coherency of his writing, along with his abilities in the social skills and reading portions, are far behind his peers with similar disabilities.', "This is a really long way of saying you don't fuck.", "Sorry ma'am, looks like his delusions have gotten worse. We'll have to admit him,", '![](https://i.kym-cdn.com/photos/images/newsfeed/001/038/094/0a1.jpg)', 'If only you could put that energy into your relationships', 'Posts like this is why I do Heroine.', 'still unemployed then?', 'K', 'look im gunna have 2 ask u 2 keep ur giant dumps in the toilet not in my replys 😷😷😷', "Mommy is soooo proud of you, sweaty. Let's put this sperg out up on the fridge with all your other failures.", "Good job bobby, here's a star", "That was a mistake. You're about to find out the hard way why.", 'You sat down and wrote all this shit. You could have done so many other things with your life. What happened to your life that made you decide writing novels of bullshit on rdrama.net was the best option?', "I don't have enough spoons to read this shit", "All those words won't bring daddy back.", 'OUT!'] diff --git a/files/helpers/word_censor.py b/files/helpers/word_censor.py index dabf74cdf..544aee775 100644 --- a/files/helpers/word_censor.py +++ b/files/helpers/word_censor.py @@ -7,10 +7,10 @@ from files.helpers.const import SLURS def create_replace_map(): dicts = [{ - slur: replacer, - slur.title(): replacer.title(), - slur.capitalize(): replacer.capitalize(), - slur.upper(): replacer.upper(), + slur.strip(): replacer, + slur.strip().title(): replacer.title(), + slur.strip().capitalize(): replacer.capitalize(), + slur.strip().upper(): replacer.upper(), } for (slur, replacer) in SLURS.items()] # flattens the list of dict to a single dict @@ -21,22 +21,33 @@ REPLACE_MAP = create_replace_map() def create_variations_slur_regex(slur: str): - variations = [slur, slur.upper(), slur.capitalize()] + stripped = slur.strip() + variations = [stripped, stripped.upper(), stripped.capitalize()] # capitalize multiple words if there are multiple words (just in case) - if " " in slur: - variations.append(slur.title()) + if " " in stripped: + variations.append(stripped.title()) - return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations] + if slur.startswith(" ") and slur.endswith(" "): + return [rf"(\s|>)({var})(\s|<)" for var in variations] + else: + return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations] def sub_matcher(match: Match): - found = match.group(2) if (match.group(2) is not None) else match.group(3) - replacer = REPLACE_MAP[found] - return (match.group(1) or '') + replacer + (match.group(4) or '') + # special case when it should match exact word + if len(match.groups()) is 3: + found = match.group(2) + replacer = REPLACE_MAP[found] + return match.group(1) + replacer + match.group(3) + + else: # normal case with prefix or suffix + found = match.group(2) if (match.group(2) is not None) else match.group(3) + replacer = REPLACE_MAP[found] + return (match.group(1) or '') + replacer + (match.group(4) or '') -def censor_slurs(v, body): +def censor_slurs(v, body: str): if v and not v.slurreplacer: return body @@ -44,7 +55,7 @@ def censor_slurs(v, body): for variation in create_variations_slur_regex(slur): try: body = re.sub(variation, sub_matcher, body) - except: - pass + except Exception as e: + print(e) return body diff --git a/test/files/helpers/test_word_censor.py b/test/files/helpers/test_word_censor.py index 86b35a259..5ffba82ce 100644 --- a/test/files/helpers/test_word_censor.py +++ b/test/files/helpers/test_word_censor.py @@ -7,6 +7,16 @@ from files.helpers import word_censor from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher +def test_create_variations_slur_regex_for_slur_with_spaces(): + expected = [r"(\s|>)(retard)(\s|<)", + r"(\s|>)(Retard)(\s|<)", + r"(\s|>)(RETARD)(\s|<)"] + + result = create_variations_slur_regex(" retard ") + + assert_that(result).is_length(3).contains_only(*expected) + + def test_create_variations_slur_regex_single_word(): expected = [r"(\s|>)(retard)|(retard)(\s|<)", r"(\s|>)(Retard)|(Retard)(\s|<)", @@ -31,6 +41,7 @@ def test_create_variations_slur_regex_multiple_word(): "tranny": "🚂🚃🚃", "kill yourself": "keep yourself safe", "faggot": "cute twink", + " nig ": "🏀", }) def test_create_replace_map(): expected = { @@ -44,13 +55,16 @@ def test_create_replace_map(): "faggot": "cute twink", "Faggot": "Cute twink", "FAGGOT": "CUTE TWINK", + "nig": "🏀", + "Nig": "🏀", + "NIG": "🏀", } result = create_replace_map() assert_that(result).is_equal_to(expected) -@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur'}) +@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'}) def test_sub_matcher(): match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "

retard

") assert_that(sub_matcher(match)).is_equal_to(">r-slur") @@ -58,8 +72,14 @@ def test_sub_matcher(): match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "

noretard

") assert_that(sub_matcher(match)).is_equal_to("r-slur<") + match = re.search(r"(\s|>)(NIG)(\s|<)", "

NIG

") + assert_that(sub_matcher(match)).is_equal_to(">🏀<") -@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'}) + match = re.search(r"(\s|>)(NIG)(\s|<)", "

NIG

") + assert_that(sub_matcher(match)).is_equal_to(">🏀 ") + + +@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'}) def test_censor_slurs(): word_censor.REPLACE_MAP = create_replace_map() @@ -68,13 +88,30 @@ def test_censor_slurs(): assert_that(censor_slurs(None, "that is Retarded like")).is_equal_to("that is R-slured like") assert_that(censor_slurs(None, "that is SUPERRETARD like")).is_equal_to("that is SUPERR-SLUR like") assert_that(censor_slurs(None, "

Manlets get out!

")).is_equal_to("

Little kings get out!

") + assert_that(censor_slurs(None, '... "retard" ...')).is_equal_to('... "retard" ...') assert_that(censor_slurs(None, '... ReTaRd ...')).is_equal_to('... ReTaRd ...') - assert_that(censor_slurs(None, '... aretarded ...')).is_equal_to('... aretarded ...') + assert_that(censor_slurs(None, '... xretardx ...')).is_equal_to('... xretardx ...') + assert_that(censor_slurs(None, "LLM is a manlet hehe")).is_equal_to("LLM is a little king hehe") assert_that(censor_slurs(None, "LLM is :marseycapitalistmanlet: hehe")) \ .is_equal_to("LLM is :marseycapitalistmanlet: hehe") + assert_that(censor_slurs(None, '... Nig ...')).is_equal_to('... 🏀 ...') + assert_that(censor_slurs(None, '

NIG

')).is_equal_to('

🏀

') + assert_that(censor_slurs(None, '... nigeria ...')).is_equal_to('... nigeria ...') + + assert_that(censor_slurs(None, "

retarded SuperManlet NIG

")) \ + .is_equal_to("

r-slured SuperLittle king 🏀

") + + +@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'}) +def test_censor_slurs_does_not_error_out_on_exception(): + word_censor.REPLACE_MAP = create_replace_map() + word_censor.REPLACE_MAP["Manlet"] = None + + assert_that(censor_slurs(None, ">retarded SuperManlet NIG<")).is_equal_to(">r-slured SuperManlet 🏀<") + @patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'}) def test_censor_slurs_does_not_censor_on_flag_disabled():