forked from rDrama/rDrama
1
0
Fork 0

Do the replacement in a single regex

master
Yo Mama 2021-10-19 00:24:15 +02:00
parent af8da42c73
commit ff76a4d688
2 changed files with 60 additions and 69 deletions

View File

@ -1,7 +1,7 @@
from collections import ChainMap from collections import ChainMap
import re import re
from re import Match from re import Match
from typing import List, Dict from typing import Dict, Pattern
from files.helpers.const import SLURS from files.helpers.const import SLURS
@ -39,6 +39,18 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
return result return result
def create_slur_regex() -> Pattern[str]:
# words that can have suffixes and prefixes
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
regex = rf"(\s|>)({words})|({words})(\s|<)"
# words that need to match exactly
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
def create_replace_map() -> Dict[str, str]: def create_replace_map() -> Dict[str, str]:
"""Creates the map that will be used to get the mathing replaced for the given slur""" """Creates the map that will be used to get the mathing replaced for the given slur"""
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()] dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
@ -47,40 +59,33 @@ def create_replace_map() -> Dict[str, str]:
return dict(ChainMap(*dicts)) return dict(ChainMap(*dicts))
SLUR_REGEX = create_slur_regex()
REPLACE_MAP = create_replace_map() REPLACE_MAP = create_replace_map()
def create_variations_slur_regex(slur: str) -> List[str]:
"""For a given match generates the corresponding replacer"""
permutations = get_permutations_slur(slur)
if slur.startswith(" ") and slur.endswith(" "):
return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
else:
return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
def sub_matcher(match: Match) -> str: def sub_matcher(match: Match) -> str:
# special case when it should match exact word """given a match returns the correct replacer string"""
if len(match.groups()) == 3:
found = match.group(2)
replacer = REPLACE_MAP[found]
return match.group(1) + replacer + match.group(3)
else: # normal case with prefix or suffix # base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
found = match.group(2) if (match.group(2) is not None) else match.group(3) if match.group(2) is not None:
replacer = REPLACE_MAP[found] found = match.group(2)
return (match.group(1) or '') + replacer + (match.group(4) or '') elif match.group(3) is not None:
found = match.group(3)
else:
found = match.group(6)
# if it does not find the correct capitalization, it tries the all lower
replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
def censor_slurs(body: str, logged_user) -> str: def censor_slurs(body: str, logged_user) -> str:
if logged_user and not logged_user.slurreplacer: """Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
return body
for (slur, replace) in SLURS.items(): if not logged_user or logged_user.slurreplacer:
for variation in create_variations_slur_regex(slur):
try: try:
body = re.sub(variation, sub_matcher, body) body = SLUR_REGEX.sub(sub_matcher, body)
except Exception as e: except Exception as e:
print(e) print(e)

View File

@ -4,8 +4,8 @@ from unittest.mock import patch
from assertpy import assert_that from assertpy import assert_that
from files.helpers import word_censor from files.helpers import word_censor
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \ from files.helpers.word_censor import create_replace_map, censor_slurs, sub_matcher, \
get_permutations_slur, first_upper, first_all_upper get_permutations_slur, first_upper, first_all_upper, create_slur_regex
def test_first_upper(): def test_first_upper():
@ -45,34 +45,16 @@ def test_get_permutations_slur_wiht_link_replacer():
assert_that(result).is_equal_to(expected) assert_that(result).is_equal_to(expected)
def test_create_variations_slur_regex_for_slur_with_spaces(): @patch("files.helpers.word_censor.SLURS", {
expected = [r"(\s|>)(retard)(\s|<)", "kill yourself": "keep yourself safe",
r"(\s|>)(Retard)(\s|<)", "faggot": "cute twink",
r"(\s|>)(RETARD)(\s|<)"] " nig ": "🏀",
" retard ": "r-slur",
})
def test_create_slur_regex():
expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
result = create_variations_slur_regex(" retard ") assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_single_word():
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
r"(\s|>)(Retard)|(Retard)(\s|<)",
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
result = create_variations_slur_regex("retard")
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_multiple_word():
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
result = create_variations_slur_regex("kill yourself")
assert_that(result).is_length(4).contains_only(*expected)
@patch("files.helpers.word_censor.SLURS", { @patch("files.helpers.word_censor.SLURS", {
@ -107,19 +89,24 @@ def test_create_replace_map():
assert_that(result).is_equal_to(expected) assert_that(result).is_equal_to(expected)
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'}) @patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'})
def test_sub_matcher(): def test_sub_matcher():
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>") regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
match = regex.search("<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur") assert_that(sub_matcher(match)).is_equal_to(">r-slur")
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>") match = regex.search("<p>noretard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur<") assert_that(sub_matcher(match)).is_equal_to("r-slur<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG</p>") match = regex.search("<p>ReTaRdEd</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
match = regex.search("<p>NIG</p>")
assert_that(sub_matcher(match)).is_equal_to(">🏀<") assert_that(sub_matcher(match)).is_equal_to(">🏀<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG </p>") match = regex.search("<p>Faggot </p>")
assert_that(sub_matcher(match)).is_equal_to(">🏀 ") assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
@patch("files.helpers.word_censor.SLURS", { @patch("files.helpers.word_censor.SLURS", {
@ -131,15 +118,16 @@ def test_sub_matcher():
}) })
def test_censor_slurs(): def test_censor_slurs():
word_censor.REPLACE_MAP = create_replace_map() word_censor.REPLACE_MAP = create_replace_map()
word_censor.SLUR_REGEX = create_slur_regex()
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>") assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>") assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like") assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like") assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>") assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...') assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...')
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...') assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe") assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe")
@ -155,11 +143,9 @@ def test_censor_slurs():
assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...') assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...') assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...') assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... i love Carp ...')
# Not covered: assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...') assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...') assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')