forked from rDrama/rDrama
Merge pull request #77 from Aevann1/only_matches_full_words
Now the slur replacer only matches full wordsmaster
commit
b1e2648d3e
|
@ -5,35 +5,20 @@ site = environ.get("DOMAIN", '').strip()
|
|||
#####################
|
||||
# Formatting rules: #
|
||||
#####################
|
||||
#
|
||||
# on the slur side, they will match prefixes and suffixes and not middle of words, so for example
|
||||
# "retard" will match:
|
||||
# - "retard"
|
||||
# - "retarded"
|
||||
# - "superretard"
|
||||
# But not "superretarded"
|
||||
#
|
||||
# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
|
||||
# "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
|
||||
# "dancing israelis" will match:
|
||||
# - "dancing israelis"
|
||||
# - "Dancing israelis"
|
||||
# - "Dancing Israelis"
|
||||
# - "DANCING ISRAELIS"
|
||||
#
|
||||
# If some letters are Uppercase, the same, but with the additional option of the original casing, and respecting already existing uppercase
|
||||
# "NoNewNormal" will match (with prefixes and suffixes omitted for brevity):
|
||||
# "NoNewNormal" will match:
|
||||
# - "NoNewNormal"
|
||||
# - "nonewnormal"
|
||||
# - "Nonewnormal"
|
||||
# - "NONEWNORMAL"
|
||||
#
|
||||
# If the slur has a space before and after then the match is limited to the exact word, no prefixes or suffixes
|
||||
# (previous rules about capitalization still apply)
|
||||
# " neg " will match only:
|
||||
# - "neg"
|
||||
# - "Neg"
|
||||
# - "NEG"
|
||||
#
|
||||
# Now on the replacement side, The replacement will have the same capitalization as the slur if the replacement is lowercase
|
||||
# "kill yourself" -> "keep yourself safe"
|
||||
# "Kill yourself" -> "Keep yourself safe"
|
||||
|
@ -48,7 +33,6 @@ site = environ.get("DOMAIN", '').strip()
|
|||
#
|
||||
# There is a super special case that if the replacer starts with "http" then it never changes capitalization
|
||||
#
|
||||
#
|
||||
# TL;DR: Just read the above once, or don't, and try to guess!
|
||||
SLURS = {
|
||||
"faggot": "cute twink",
|
||||
|
@ -113,8 +97,6 @@ SLURS = {
|
|||
"china flu": "SARS-CoV-2 syndemic",
|
||||
"china virus": "SARS-CoV-2 syndemic",
|
||||
"kung flu": "SARS-CoV-2 syndemic",
|
||||
|
||||
# if the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes
|
||||
"nig": "π",
|
||||
"nigs": "πs",
|
||||
}
|
||||
|
|
|
@ -40,20 +40,14 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
|
|||
|
||||
|
||||
def create_slur_regex() -> Pattern[str]:
|
||||
# words that can have suffixes and prefixes
|
||||
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
|
||||
"""Creates the regex that will find the slurs"""
|
||||
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys()])
|
||||
|
||||
# to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
|
||||
regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"
|
||||
|
||||
# words that need to match exactly
|
||||
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
|
||||
|
||||
return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")
|
||||
return re.compile(rf"(?i)(?<=\s|>)({single_words})(?=\s|<)")
|
||||
|
||||
|
||||
def create_replace_map() -> Dict[str, str]:
|
||||
"""Creates the map that will be used to get the mathing replaced for the given slur"""
|
||||
"""Creates the map that will be used to get the matching replaced for the given slur"""
|
||||
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
|
||||
|
||||
# flattens the list of dict to a single dict
|
||||
|
|
|
@ -52,7 +52,7 @@ def test_get_permutations_slur_wiht_link_replacer():
|
|||
"retard": "r-slur",
|
||||
})
|
||||
def test_create_slur_regex():
|
||||
expected = r"(?i)(?<=\s|>)(kill yourself|faggot)|(kill yourself|faggot)(?=\s|<)|(?<=\s|>)(nig|retard)(?=\s|<)"
|
||||
expected = r"(?i)(?<=\s|>)(kill yourself|faggot|nig|retard)(?=\s|<)"
|
||||
|
||||
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
|
||||
|
||||
|
@ -97,10 +97,7 @@ def test_sub_matcher():
|
|||
match = regex.search("<p>retard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>noretard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>ReTaRdEd</p>")
|
||||
match = regex.search("<p>ReTaRd</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>NIG</p>")
|
||||
|
@ -122,12 +119,13 @@ def test_censor_slurs():
|
|||
word_censor.SLUR_REGEX = create_slur_regex()
|
||||
|
||||
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
|
||||
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
|
||||
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
|
||||
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
|
||||
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
|
||||
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
|
||||
assert_that(censor_slurs("<p>Manlet get out!</p>", None)).is_equal_to("<p>Little king get out!</p>")
|
||||
|
||||
# does not work:
|
||||
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>preretard</p>")
|
||||
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is Retarded like")
|
||||
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERRETARD like")
|
||||
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
|
||||
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
|
||||
|
||||
|
@ -150,8 +148,7 @@ def test_censor_slurs():
|
|||
|
||||
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
|
||||
|
||||
assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
|
||||
.is_equal_to("<p>r-slured SuperLittle king π</p>")
|
||||
assert_that(censor_slurs("<p>retard Manlet NIG</p>", None)).is_equal_to("<p>r-slur Little king π</p>")
|
||||
|
||||
assert_that(censor_slurs('... kike ...', None)) \
|
||||
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
|
||||
|
@ -168,7 +165,7 @@ def test_censor_slurs_does_not_error_out_on_exception():
|
|||
word_censor.REPLACE_MAP["manlet"] = None
|
||||
word_censor.REPLACE_MAP["Manlet"] = None
|
||||
|
||||
assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet π<")
|
||||
assert_that(censor_slurs(">retard Manlet NIG<", None)).is_equal_to(">r-slur Manlet π<")
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
|
||||
|
|
Loadingβ¦
Reference in New Issue