forked from rDrama/rDrama
Do the replacement in a single regex
parent
af8da42c73
commit
ff76a4d688
|
@ -1,7 +1,7 @@
|
|||
from collections import ChainMap
|
||||
import re
|
||||
from re import Match
|
||||
from typing import List, Dict
|
||||
from typing import Dict, Pattern
|
||||
|
||||
from files.helpers.const import SLURS
|
||||
|
||||
|
@ -39,6 +39,18 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
|
|||
return result
|
||||
|
||||
|
||||
def create_slur_regex() -> Pattern[str]:
|
||||
# words that can have suffixes and prefixes
|
||||
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
|
||||
|
||||
regex = rf"(\s|>)({words})|({words})(\s|<)"
|
||||
|
||||
# words that need to match exactly
|
||||
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
|
||||
|
||||
return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
|
||||
|
||||
|
||||
def create_replace_map() -> Dict[str, str]:
|
||||
"""Creates the map that will be used to get the mathing replaced for the given slur"""
|
||||
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
|
||||
|
@ -47,41 +59,34 @@ def create_replace_map() -> Dict[str, str]:
|
|||
return dict(ChainMap(*dicts))
|
||||
|
||||
|
||||
SLUR_REGEX = create_slur_regex()
|
||||
REPLACE_MAP = create_replace_map()
|
||||
|
||||
|
||||
def create_variations_slur_regex(slur: str) -> List[str]:
|
||||
"""For a given match generates the corresponding replacer"""
|
||||
permutations = get_permutations_slur(slur)
|
||||
|
||||
if slur.startswith(" ") and slur.endswith(" "):
|
||||
return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
|
||||
else:
|
||||
return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
|
||||
|
||||
|
||||
def sub_matcher(match: Match) -> str:
|
||||
# special case when it should match exact word
|
||||
if len(match.groups()) == 3:
|
||||
found = match.group(2)
|
||||
replacer = REPLACE_MAP[found]
|
||||
return match.group(1) + replacer + match.group(3)
|
||||
"""given a match returns the correct replacer string"""
|
||||
|
||||
else: # normal case with prefix or suffix
|
||||
found = match.group(2) if (match.group(2) is not None) else match.group(3)
|
||||
replacer = REPLACE_MAP[found]
|
||||
return (match.group(1) or '') + replacer + (match.group(4) or '')
|
||||
# base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
|
||||
if match.group(2) is not None:
|
||||
found = match.group(2)
|
||||
elif match.group(3) is not None:
|
||||
found = match.group(3)
|
||||
else:
|
||||
found = match.group(6)
|
||||
|
||||
# if it does not find the correct capitalization, it tries the all lower
|
||||
replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
|
||||
|
||||
return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
|
||||
|
||||
|
||||
def censor_slurs(body: str, logged_user) -> str:
|
||||
if logged_user and not logged_user.slurreplacer:
|
||||
return body
|
||||
"""Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
|
||||
|
||||
for (slur, replace) in SLURS.items():
|
||||
for variation in create_variations_slur_regex(slur):
|
||||
try:
|
||||
body = re.sub(variation, sub_matcher, body)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
if not logged_user or logged_user.slurreplacer:
|
||||
try:
|
||||
body = SLUR_REGEX.sub(sub_matcher, body)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return body
|
||||
|
|
|
@ -4,8 +4,8 @@ from unittest.mock import patch
|
|||
from assertpy import assert_that
|
||||
|
||||
from files.helpers import word_censor
|
||||
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
|
||||
get_permutations_slur, first_upper, first_all_upper
|
||||
from files.helpers.word_censor import create_replace_map, censor_slurs, sub_matcher, \
|
||||
get_permutations_slur, first_upper, first_all_upper, create_slur_regex
|
||||
|
||||
|
||||
def test_first_upper():
|
||||
|
@ -45,34 +45,16 @@ def test_get_permutations_slur_wiht_link_replacer():
|
|||
assert_that(result).is_equal_to(expected)
|
||||
|
||||
|
||||
def test_create_variations_slur_regex_for_slur_with_spaces():
|
||||
expected = [r"(\s|>)(retard)(\s|<)",
|
||||
r"(\s|>)(Retard)(\s|<)",
|
||||
r"(\s|>)(RETARD)(\s|<)"]
|
||||
@patch("files.helpers.word_censor.SLURS", {
|
||||
"kill yourself": "keep yourself safe",
|
||||
"faggot": "cute twink",
|
||||
" nig ": "🏀",
|
||||
" retard ": "r-slur",
|
||||
})
|
||||
def test_create_slur_regex():
|
||||
expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
|
||||
|
||||
result = create_variations_slur_regex(" retard ")
|
||||
|
||||
assert_that(result).is_length(3).contains_only(*expected)
|
||||
|
||||
|
||||
def test_create_variations_slur_regex_single_word():
|
||||
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
|
||||
r"(\s|>)(Retard)|(Retard)(\s|<)",
|
||||
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
|
||||
|
||||
result = create_variations_slur_regex("retard")
|
||||
|
||||
assert_that(result).is_length(3).contains_only(*expected)
|
||||
|
||||
|
||||
def test_create_variations_slur_regex_multiple_word():
|
||||
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
|
||||
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
|
||||
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
|
||||
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
|
||||
result = create_variations_slur_regex("kill yourself")
|
||||
|
||||
assert_that(result).is_length(4).contains_only(*expected)
|
||||
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {
|
||||
|
@ -107,19 +89,24 @@ def test_create_replace_map():
|
|||
assert_that(result).is_equal_to(expected)
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'})
|
||||
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'})
|
||||
def test_sub_matcher():
|
||||
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
|
||||
regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
|
||||
|
||||
match = regex.search("<p>retard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||
|
||||
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
|
||||
match = regex.search("<p>noretard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
|
||||
|
||||
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG</p>")
|
||||
match = regex.search("<p>ReTaRdEd</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||
|
||||
match = regex.search("<p>NIG</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">🏀<")
|
||||
|
||||
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG </p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">🏀 ")
|
||||
match = regex.search("<p>Faggot </p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {
|
||||
|
@ -131,15 +118,16 @@ def test_sub_matcher():
|
|||
})
|
||||
def test_censor_slurs():
|
||||
word_censor.REPLACE_MAP = create_replace_map()
|
||||
word_censor.SLUR_REGEX = create_slur_regex()
|
||||
|
||||
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
|
||||
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
|
||||
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
|
||||
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
|
||||
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
|
||||
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
|
||||
|
||||
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
|
||||
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...')
|
||||
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
|
||||
|
||||
assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe")
|
||||
|
@ -155,11 +143,9 @@ def test_censor_slurs():
|
|||
assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
|
||||
assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
|
||||
assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
|
||||
|
||||
# Not covered:
|
||||
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
|
||||
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
|
||||
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
|
||||
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... i love Carp ...')
|
||||
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i love Carp ...')
|
||||
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i love Carp ...')
|
||||
|
||||
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
|
||||
|
||||
|
|
Loading…
Reference in New Issue