forked from rDrama/rDrama
1
0
Fork 0

Now the slur replacer only matches full words

master
Yo Mama 2021-10-19 23:11:19 +02:00
parent e2c7a74cdd
commit 3602a6446e
3 changed files with 21 additions and 48 deletions

View File

@ -5,35 +5,20 @@ site = environ.get("DOMAIN", '').strip()
#####################
# Formatting rules: #
#####################
#
# on the slur side, they will match prefixes and suffixes and not middle of words, so for example
# "retard" will match:
# - "retard"
# - "retarded"
# - "superretard"
# But not "superretarded"
#
# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
# "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
# "dancing israelis" will match:
# - "dancing israelis"
# - "Dancing israelis"
# - "Dancing Israelis"
# - "DANCING ISRAELIS"
#
# If some letters are Uppercase, the same, but with the additional option of the original casing, and respecting already existing uppercase
# "NoNewNormal" will match (with prefixes and suffixes omitted for brevity):
# "NoNewNormal" will match:
# - "NoNewNormal"
# - "nonewnormal"
# - "Nonewnormal"
# - "NONEWNORMAL"
#
# If the slur has a space before and after then the match is limited to the exact word, no prefixes or suffixes
# (previous rules about capitalization still apply)
# " neg " will match only:
# - "neg"
# - "Neg"
# - "NEG"
#
# Now on the replacement side, The replacement will have the same capitalization as the slur if the replacement is lowercase
# "kill yourself" -> "keep yourself safe"
# "Kill yourself" -> "Keep yourself safe"
@ -48,7 +33,6 @@ site = environ.get("DOMAIN", '').strip()
#
# There is a super special case that if the replacer starts with "http" then it never changes capitalization
#
#
# TL;DR: Just read the above once, or don't, and try to guess!
SLURS = {
"faggot": "cute twink",
@ -113,8 +97,6 @@ SLURS = {
"china flu": "SARS-CoV-2 syndemic",
"china virus": "SARS-CoV-2 syndemic",
"kung flu": "SARS-CoV-2 syndemic",
# if the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes
"nig": "πŸ€",
"nigs": "πŸ€s",
}

View File

@ -40,20 +40,14 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
def create_slur_regex() -> Pattern[str]:
# words that can have suffixes and prefixes
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
"""Creates the regex that will find the slurs"""
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys()])
# to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"
# words that need to match exactly
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")
return re.compile(rf"(?i)(?<=\s|>)({single_words})(?=\s|<)")
def create_replace_map() -> Dict[str, str]:
"""Creates the map that will be used to get the mathing replaced for the given slur"""
"""Creates the map that will be used to get the matching replaced for the given slur"""
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
# flattens the list of dict to a single dict

View File

@ -52,7 +52,7 @@ def test_get_permutations_slur_wiht_link_replacer():
"retard": "r-slur",
})
def test_create_slur_regex():
expected = r"(?i)(?<=\s|>)(kill yourself|faggot)|(kill yourself|faggot)(?=\s|<)|(?<=\s|>)(nig|retard)(?=\s|<)"
expected = r"(?i)(?<=\s|>)(kill yourself|faggot|nig|retard)(?=\s|<)"
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
@ -97,10 +97,7 @@ def test_sub_matcher():
match = regex.search("<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>noretard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>ReTaRdEd</p>")
match = regex.search("<p>ReTaRd</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>NIG</p>")
@ -122,12 +119,13 @@ def test_censor_slurs():
word_censor.SLUR_REGEX = create_slur_regex()
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
assert_that(censor_slurs("<p>Manlet get out!</p>", None)).is_equal_to("<p>Little king get out!</p>")
# does not work:
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>preretard</p>")
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is Retarded like")
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERRETARD like")
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
@ -150,8 +148,7 @@ def test_censor_slurs():
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
.is_equal_to("<p>r-slured SuperLittle king πŸ€</p>")
assert_that(censor_slurs("<p>retard Manlet NIG</p>", None)).is_equal_to("<p>r-slur Little king πŸ€</p>")
assert_that(censor_slurs('... kike ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
@ -168,7 +165,7 @@ def test_censor_slurs_does_not_error_out_on_exception():
word_censor.REPLACE_MAP["manlet"] = None
word_censor.REPLACE_MAP["Manlet"] = None
assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet πŸ€<")
assert_that(censor_slurs(">retard Manlet NIG<", None)).is_equal_to(">r-slur Manlet πŸ€<")
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})