Do the replacement in a single regex

2021-10-19 00:24:15 +02:00 · 2021-10-19 00:24:15 +02:00 · ff76a4d688
parent af8da42c73
commit ff76a4d688
2 changed files with 60 additions and 69 deletions
--- a/files/helpers/word_censor.py
+++ b/files/helpers/word_censor.py
@ -1,7 +1,7 @@
 from collections import ChainMap
 import re
 from re import Match
-from typing import List, Dict
+from typing import Dict, Pattern
 from files.helpers.const import SLURS
@ -39,6 +39,18 @@ def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
    return result
 def create_slur_regex() -> Pattern[str]:
    # words that can have suffixes and prefixes
    words = "|".join([slur.lower() for slur in SLURS.keys() if not slur.startswith(" ")])
    regex = rf"(\s|>)({words})|({words})(\s|<)"
    # words that need to match exactly
    single_words = "|".join([slur.strip().lower() for slur in SLURS.keys() if slur.startswith(" ")])
    return re.compile(rf"(?i){regex}|(\s|>)({single_words})(\s|<)")
 def create_replace_map() -> Dict[str, str]:
    """Creates the map that will be used to get the mathing replaced for the given slur"""
    dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
@ -47,40 +59,33 @@ def create_replace_map() -> Dict[str, str]:
    return dict(ChainMap(*dicts))
 SLUR_REGEX = create_slur_regex()
 REPLACE_MAP = create_replace_map()
 def create_variations_slur_regex(slur: str) -> List[str]:
    """For a given match generates the corresponding replacer"""
    permutations = get_permutations_slur(slur)
    if slur.startswith(" ") and slur.endswith(" "):
        return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
    else:
        return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
 def sub_matcher(match: Match) -> str:
-    # special case when it should match exact word
+    """given a match returns the correct replacer string"""
    if len(match.groups()) == 3:
        found = match.group(2)
        replacer = REPLACE_MAP[found]
        return match.group(1) + replacer + match.group(3)
-    else:  # normal case with prefix or suffix
+    # base regex: (?i)(\s|>)(words)|(words)(\s|<)|(\s|>)(words)(\s|<)
-        found = match.group(2) if (match.group(2) is not None) else match.group(3)
+    if match.group(2) is not None:
-        replacer = REPLACE_MAP[found]
+        found = match.group(2)
-        return (match.group(1) or '') + replacer + (match.group(4) or '')
+    elif match.group(3) is not None:
        found = match.group(3)
    else:
        found = match.group(6)
    # if it does not find the correct capitalization, it tries the all lower
    replacer = REPLACE_MAP.get(found) or REPLACE_MAP.get(found.lower())
    return (match.group(1) or match.group(5) or '') + replacer + (match.group(4) or match.group(7) or '')
 def censor_slurs(body: str, logged_user) -> str:
-    if logged_user and not logged_user.slurreplacer:
+    """Censors all the slurs in the body if the user is not logged in or if they have the slurreplacer active"""
        return body
-    for (slur, replace) in SLURS.items():
+    if not logged_user or logged_user.slurreplacer:
        for variation in create_variations_slur_regex(slur):
        try:
-                body = re.sub(variation, sub_matcher, body)
+            body = SLUR_REGEX.sub(sub_matcher, body)
        except Exception as e:
            print(e)
--- a/test/files/helpers/test_word_censor.py
+++ b/test/files/helpers/test_word_censor.py
@ -4,8 +4,8 @@ from unittest.mock import patch
 from assertpy import assert_that
 from files.helpers import word_censor
-from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
+from files.helpers.word_censor import create_replace_map, censor_slurs, sub_matcher, \
-    get_permutations_slur, first_upper, first_all_upper
+    get_permutations_slur, first_upper, first_all_upper, create_slur_regex
 def test_first_upper():
@ -45,34 +45,16 @@ def test_get_permutations_slur_wiht_link_replacer():
    assert_that(result).is_equal_to(expected)
-def test_create_variations_slur_regex_for_slur_with_spaces():
+@patch("files.helpers.word_censor.SLURS", {
-    expected = [r"(\s|>)(retard)(\s|<)",
+    "kill yourself": "keep yourself safe",
-                r"(\s|>)(Retard)(\s|<)",
+    "faggot": "cute twink",
-                r"(\s|>)(RETARD)(\s|<)"]
+    " nig ": "🏀",
    " retard ": "r-slur",
 })
 def test_create_slur_regex():
    expected = r"(?i)(\s|>)(kill yourself|faggot)|(kill yourself|faggot)(\s|<)|(\s|>)(nig|retard)(\s|<)"
-    result = create_variations_slur_regex(" retard ")
+    assert_that(create_slur_regex()).is_equal_to(re.compile(expected))
    assert_that(result).is_length(3).contains_only(*expected)
 def test_create_variations_slur_regex_single_word():
    expected = [r"(\s|>)(retard)|(retard)(\s|<)",
                r"(\s|>)(Retard)|(Retard)(\s|<)",
                r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
    result = create_variations_slur_regex("retard")
    assert_that(result).is_length(3).contains_only(*expected)
 def test_create_variations_slur_regex_multiple_word():
    expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
                r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
                r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
                r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
    result = create_variations_slur_regex("kill yourself")
    assert_that(result).is_length(4).contains_only(*expected)
@patch("files.helpers.word_censor.SLURS", {
@ -107,19 +89,24 @@ def test_create_replace_map():
    assert_that(result).is_equal_to(expected)
-@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'})
+@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'Faggot': 'Cute twink', 'NIG': '🏀'})
 def test_sub_matcher():
-    match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
+    regex = re.compile(r"(?i)(\s|>)(kill yourself|retard)|(kill yourself|retard)(\s|<)|(\s|>)(nig|faggot)(\s|<)")
    match = regex.search("<p>retard</p>")
    assert_that(sub_matcher(match)).is_equal_to(">r-slur")
-    match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
+    match = regex.search("<p>noretard</p>")
    assert_that(sub_matcher(match)).is_equal_to("r-slur<")
-    match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG</p>")
+    match = regex.search("<p>ReTaRdEd</p>")
    assert_that(sub_matcher(match)).is_equal_to(">r-slur")
    match = regex.search("<p>NIG</p>")
    assert_that(sub_matcher(match)).is_equal_to(">🏀<")
-    match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG </p>")
+    match = regex.search("<p>Faggot </p>")
-    assert_that(sub_matcher(match)).is_equal_to(">🏀 ")
+    assert_that(sub_matcher(match)).is_equal_to(">Cute twink ")
@patch("files.helpers.word_censor.SLURS", {
@ -131,15 +118,16 @@ def test_sub_matcher():
 })
 def test_censor_slurs():
    word_censor.REPLACE_MAP = create_replace_map()
    word_censor.SLUR_REGEX = create_slur_regex()
    assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
    assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
    assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
    assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
    assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... r-slur ...')
    assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
    assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
    assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...')
    assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
    assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe")
@ -155,11 +143,9 @@ def test_censor_slurs():
    assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
    assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
    assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
-
+    assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... i love Carp ...')
-    # Not covered:
+    assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i love Carp ...')
-    assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
+    assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i love Carp ...')
    assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
    assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
    assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')