forked from rDrama/rDrama
1
0
Fork 0

Do the replacement in a single regex

master
Yo Mama 2021-10-19 00:24:15 +02:00
parent af8da42c73
commit ff76a4d688
2 changed files with 60 additions and 69 deletions

View File

@ -1,7 +1,7 @@
from collections import ChainMap
import re
from re import Match
from typing import List, Dict
from typing import Dict, Pattern
from files.helpers.const import SLURS
@ -39,6 +39,18 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
return result
def create_slur_regex() -> Pattern[str]:
# words that can have suffixes and prefixes
words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
regex = rf"(\s|>)({words})|({words})(\s|<)"
# words that need to match exactly
single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
def create_replace_map() -> Dict[str, str]:
"""Creates the map that will be used to get the mathing replaced for the given slur"""
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
@ -47,41 +59,34 @@ def create_replace_map() -> Dict[str, str]:
return dict(ChainMap(*dicts))
SLUR_REGEX = create_slur_regex()
REPLACE_MAP = create_replace_map()
def create_variations_slur_regex(slur: str) -> List[str]:
"""For a given match generates the corresponding replacer"""
permutations = get_permutations_slur(slur)
if slur.startswith(" ") and slur.endswith(" "):
return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
else:
return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
def sub_matcher(match: Match) -> str:
# special case when it should match exact word
if len(match.groups()) == 3:
found = match.group(2)
replacer = REPLACE_MAP[found]
return match.group(1) + replacer + match.group(3)
"""given a match returns the correct replacer string"""
else: # normal case with prefix or suffix
found = match.group(2) if (match.group(2) is not None) else match.group(3)
replacer = REPLACE_MAP[found]
return (match.group(1) or '') + replacer + (match.group(4) or '')
# base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
if match.group(2) is not None:
found = match.group(2)
elif match.group(3) is not None:
found = match.group(3)
else:
found = match.group(6)
# if it does not find the correct capitalization, it tries the all lower
replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
def censor_slurs(body: str, logged_user) -> str:
if logged_user and not logged_user.slurreplacer:
return body
"""Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
for (slur, replace) in SLURS.items():
for variation in create_variations_slur_regex(slur):
try:
body = re.sub(variation, sub_matcher, body)
except Exception as e:
print(e)
if not logged_user or logged_user.slurreplacer:
try:
body = SLUR_REGEX.sub(sub_matcher, body)
except Exception as e:
print(e)
return body

View File

@ -4,8 +4,8 @@ from unittest.mock import patch
from assertpy import assert_that
from files.helpers import word_censor
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
get_permutations_slur, first_upper, first_all_upper
from files.helpers.word_censor import create_replace_map, censor_slurs, sub_matcher, \
get_permutations_slur, first_upper, first_all_upper, create_slur_regex
def test_first_upper():
@ -45,34 +45,16 @@ def test_get_permutations_slur_wiht_link_replacer():
assert_that(result).is_equal_to(expected)
def test_create_variations_slur_regex_for_slur_with_spaces():
expected = [r"(\s|>)(retard)(\s|<)",
r"(\s|>)(Retard)(\s|<)",
r"(\s|>)(RETARD)(\s|<)"]
@patch("files.helpers.word_censor.SLURS", {
"kill yourself": "keep yourself safe",
"faggot": "cute twink",
" nig ": "🏀",
" retard ": "r-slur",
})
def test_create_slur_regex():
expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
result = create_variations_slur_regex(" retard ")
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_single_word():
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
r"(\s|>)(Retard)|(Retard)(\s|<)",
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
result = create_variations_slur_regex("retard")
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_multiple_word():
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
result = create_variations_slur_regex("kill yourself")
assert_that(result).is_length(4).contains_only(*expected)
assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
@patch("files.helpers.word_censor.SLURS", {
@ -107,19 +89,24 @@ def test_create_replace_map():
assert_that(result).is_equal_to(expected)
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'})
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'})
def test_sub_matcher():
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
match = regex.search("<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
match = regex.search("<p>noretard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG</p>")
match = regex.search("<p>ReTaRdEd</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
match = regex.search("<p>NIG</p>")
assert_that(sub_matcher(match)).is_equal_to(">🏀<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG </p>")
assert_that(sub_matcher(match)).is_equal_to(">🏀 ")
match = regex.search("<p>Faggot </p>")
assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
@patch("files.helpers.word_censor.SLURS", {
@ -131,15 +118,16 @@ def test_sub_matcher():
})
def test_censor_slurs():
word_censor.REPLACE_MAP = create_replace_map()
word_censor.SLUR_REGEX = create_slur_regex()
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...')
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe")
@ -155,11 +143,9 @@ def test_censor_slurs():
assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
# Not covered:
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')