forked from MarseyWorld/MarseyWorld
Change the regex to have lookahed and lookbehinds so the match is more 'pure'
parent
ff76a4d688
commit
10c8d7def6
|
@ -43,12 +43,13 @@ def create_slur_regex() -> Pattern[str]:
|
|||
# words that can have suffixes and prefixes
|
||||
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
|
||||
|
||||
regex = rf"(\s|>)({words})|({words})(\s|<)"
|
||||
# to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
|
||||
regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"
|
||||
|
||||
# words that need to match exactly
|
||||
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
|
||||
|
||||
return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
|
||||
return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")
|
||||
|
||||
|
||||
def create_replace_map() -> Dict[str, str]:
|
||||
|
@ -65,23 +66,13 @@ REPLACE_MAP = create_replace_map()
|
|||
|
||||
def sub_matcher(match: Match) -> str:
|
||||
"""given a match returns the correct replacer string"""
|
||||
|
||||
# base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
|
||||
if match.group(2) is not None:
|
||||
found = match.group(2)
|
||||
elif match.group(3) is not None:
|
||||
found = match.group(3)
|
||||
else:
|
||||
found = match.group(6)
|
||||
|
||||
# if it does not find the correct capitalization, it tries the all lower
|
||||
replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
|
||||
|
||||
return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
|
||||
found = match.group(0)
|
||||
# if it does not find the correct capitalization, it tries the all lower, or return the original word
|
||||
return REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower()) or found
|
||||
|
||||
|
||||
def censor_slurs(body: str, logged_user) -> str:
|
||||
"""Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
|
||||
"""Censors all the slurs in the body if the user is not logged-in or if they have the slurreplacer active"""
|
||||
|
||||
if not logged_user or logged_user.slurreplacer:
|
||||
try:
|
||||
|
|
|
@ -52,7 +52,7 @@ def test_get_permutations_slur_wiht_link_replacer():
|
|||
" retard ": "r-slur",
|
||||
})
|
||||
def test_create_slur_regex():
|
||||
expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
|
||||
expected = r"(?i)(?<=\s|>)(kill yourself|faggot)|(kill yourself|faggot)(?=\s|<)|(?<=\s|>)(nig|retard)(?=\s|<)"
|
||||
|
||||
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
|
||||
|
||||
|
@ -91,22 +91,23 @@ def test_create_replace_map():
|
|||
|
||||
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': 'π'})
|
||||
def test_sub_matcher():
|
||||
regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
|
||||
regex = re.compile(
|
||||
r"(?i)(?<=\s|>)(kill yourself|retard)|(kill yourself|retard)(?=\s|<)|(?<=\s|>)(nig|faggot)(?=\s|<)")
|
||||
|
||||
match = regex.search("<p>retard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>noretard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>ReTaRdEd</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur")
|
||||
|
||||
match = regex.search("<p>NIG</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">π<")
|
||||
assert_that(sub_matcher(match)).is_equal_to("π")
|
||||
|
||||
match = regex.search("<p>Faggot </p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
|
||||
assert_that(sub_matcher(match)).is_equal_to("Cute twink")
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {
|
||||
|
@ -163,6 +164,8 @@ def test_censor_slurs():
|
|||
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': 'π'})
|
||||
def test_censor_slurs_does_not_error_out_on_exception():
|
||||
word_censor.REPLACE_MAP = create_replace_map()
|
||||
word_censor.SLUR_REGEX = create_slur_regex()
|
||||
word_censor.REPLACE_MAP["manlet"] = None
|
||||
word_censor.REPLACE_MAP["Manlet"] = None
|
||||
|
||||
assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet π<")
|
||||
|
@ -171,6 +174,7 @@ def test_censor_slurs_does_not_error_out_on_exception():
|
|||
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
|
||||
def test_censor_slurs_does_not_censor_on_flag_disabled():
|
||||
word_censor.REPLACE_MAP = create_replace_map()
|
||||
word_censor.SLUR_REGEX = create_slur_regex()
|
||||
|
||||
class User:
|
||||
def __init__(self, slurreplacer):
|
||||
|
|
Loadingβ¦
Reference in New Issue