Change the regex to have lookahed and lookbehinds so the match is more 'pure'

master
Yo Mama 2021-10-19 00:46:10 +02:00
parent ff76a4d688
commit 10c8d7def6
2 changed files with 18 additions and 23 deletions

View File

@ -43,12 +43,13 @@ def create_slur_regex() -> Pattern[str]:
# words that can have suffixes and prefixes
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
regex = rf"(\s|>)({words})|({words})(\s|<)"
# to understand the weird groups see: https://www.regular-expressions.info/lookaround.html
regex = rf"(?<=\s|>)({words})|({words})(?=\s|<)"
# words that need to match exactly
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
return re.compile(rf"(?i){regex}|(?<=\s|>)({single_words})(?=\s|<)")
def create_replace_map() -> Dict[str, str]:
@ -65,23 +66,13 @@ REPLACE_MAP = create_replace_map()
def sub_matcher(match: Match) -> str:
"""given a match returns the correct replacer string"""
# base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
if match.group(2) is not None:
found = match.group(2)
elif match.group(3) is not None:
found = match.group(3)
else:
found = match.group(6)
# if it does not find the correct capitalization, it tries the all lower
replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
found = match.group(0)
# if it does not find the correct capitalization, it tries the all lower, or return the original word
return REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower()) or found
def censor_slurs(body: str, logged_user) -> str:
"""Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
"""Censors all the slurs in the body if the user is not logged-in or if they have the slurreplacer active"""
if not logged_user or logged_user.slurreplacer:
try:

View File

@ -52,7 +52,7 @@ def test_get_permutations_slur_wiht_link_replacer():
" retard ": "r-slur",
})
def test_create_slur_regex():
expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
expected = r"(?i)(?<=\s|>)(kill yourself|faggot)|(kill yourself|faggot)(?=\s|<)|(?<=\s|>)(nig|retard)(?=\s|<)"
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
@ -91,22 +91,23 @@ def test_create_replace_map():
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': 'πŸ€'})
def test_sub_matcher():
regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
regex = re.compile(
r"(?i)(?<=\s|>)(kill yourself|retard)|(kill yourself|retard)(?=\s|<)|(?<=\s|>)(nig|faggot)(?=\s|<)")
match = regex.search("<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>noretard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>ReTaRdEd</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
assert_that(sub_matcher(match)).is_equal_to("r-slur")
match = regex.search("<p>NIG</p>")
assert_that(sub_matcher(match)).is_equal_to(">πŸ€<")
assert_that(sub_matcher(match)).is_equal_to("πŸ€")
match = regex.search("<p>Faggot </p>")
assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
assert_that(sub_matcher(match)).is_equal_to("Cute twink")
@patch("files.helpers.word_censor.SLURS", {
@ -163,6 +164,8 @@ def test_censor_slurs():
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': 'πŸ€'})
def test_censor_slurs_does_not_error_out_on_exception():
word_censor.REPLACE_MAP = create_replace_map()
word_censor.SLUR_REGEX = create_slur_regex()
word_censor.REPLACE_MAP["manlet"] = None
word_censor.REPLACE_MAP["Manlet"] = None
assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet πŸ€<")
@ -171,6 +174,7 @@ def test_censor_slurs_does_not_error_out_on_exception():
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
def test_censor_slurs_does_not_censor_on_flag_disabled():
word_censor.REPLACE_MAP = create_replace_map()
word_censor.SLUR_REGEX = create_slur_regex()
class User:
def __init__(self, slurreplacer):