Corrected specific Uppercase slur replacement

remotes/1693045480750635534/spooky-22
Yo Mama 2021-10-17 01:58:10 +02:00
parent ea21b9818f
commit 8dd79e5368
3 changed files with 122 additions and 28 deletions

View File

@ -13,11 +13,10 @@ site = environ.get("DOMAIN", '').strip()
# - "superretard"
# But not "superretarded"
#
# If all letters are lowercase then it will match lowercase, all variations of first letter of words up and all letters up
# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
# "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
# - "dancing israelis"
# - "Dancing israelis"
# - "dancing Israelis"
# - "Dancing Israelis"
# - "DANCING ISRAELIS"
#
@ -39,7 +38,6 @@ site = environ.get("DOMAIN", '').strip()
# "kill yourself" -> "keep yourself safe"
# "Kill yourself" -> "Keep yourself safe"
# "Kill Yourself" -> "Keep Yourself Safe"
# "kill Yourself" -> "Keep yourself safe" (this one is harder to keep the capitalizaion, so it defaults to first word upper and the others lower)
# "KILL YOURSELF" -> "KEEP YOURSELF SAFE"
#
# If the replacement side has some capitalization, then that capitalization will always be maintained
@ -48,6 +46,8 @@ site = environ.get("DOMAIN", '').strip()
# "Pajeet" -> "Sexy Indian dude"
# "PAJEET" -> "SEXY INDIAN DUDE"
#
# There is a super special case that if the replacer starts with "http" then it never changes capitalization
#
#
# TL;DR: Just read the above once, or don't, and try to guess!
SLURS = {
@ -74,7 +74,7 @@ SLURS = {
"latina": "latinx",
"hispanics": "latinx",
"hispanic": "latinx",
"uss liberty incident": "tragic accident aboard the USS Liberty",
"USS liberty incident": "tragic accident aboard the USS Liberty",
"lavon affair": "Lavon Misunderstanding",
"shylock": "Israeli friend",
"yid": "Israeli friend",
@ -90,7 +90,7 @@ SLURS = {
"i hate marsey": "i love marsey",
"libertarian": "pedophile",
"billie bilish": "Billie Eilish (fat cow)",
"dancing israelis": "i love Israel",
"dancing Israelis": "i love Israel",
"sodomite": "total dreamboat",
"pajeet": "sexy Indian dude",
"female": "birthing person",
@ -99,7 +99,7 @@ SLURS = {
"renter": "rentoid",
"autistic": "neurodivergent",
"anime": "p-dophilic japanese cartoons",
"holohoax": "I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
"holohoax": "i tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
"groomercord": "discord (actually a pretty cool service)",
"pedocord": "discord (actually a pretty cool service)",
"i hate Carp": "i love Carp",

View File

@ -5,13 +5,42 @@ from re import Match
from files.helpers.const import SLURS
def create_replace_map():
dicts = [{
slur.strip(): replacer,
slur.strip().title(): replacer.title(),
slur.strip().capitalize(): replacer.capitalize(),
slur.strip().upper(): replacer.upper(),
} for (slur, replacer) in SLURS.items()]
def first_upper(phrase: str) -> str:
"""Converts the first character of the phrase to uppercase, not messing with the others"""
return phrase[0].upper() + phrase[1:]
def first_all_upper(phrase: str) -> str:
"""Converts the first character of each word to uppercase, not messing with the others"""
if " " not in phrase:
return first_upper(phrase)
return " ".join([first_upper(word) for word in phrase.split(" ")])
def get_permutations_slur(slur: str, replacer: str = "_") -> dict[str, str]:
"""
Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the
corresponding substitution with case
"""
stripped = slur.strip()
is_link = replacer.startswith("http") # special case for the :marseymerchant:
# the order the things are added into the dict is important, so that the 'Correctest' version is written last
result = {
stripped.upper(): replacer.upper() if not is_link else replacer,
first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer,
stripped.lower(): replacer,
stripped: replacer,
first_upper(stripped): first_upper(replacer) if not is_link else replacer,
}
return result
def create_replace_map() -> dict[str: str]:
"""Creates the map that will be used to get the mathing replaced for the given slur"""
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
# flattens the list of dict to a single dict
return dict(ChainMap(*dicts))
@ -20,21 +49,17 @@ def create_replace_map():
REPLACE_MAP = create_replace_map()
def create_variations_slur_regex(slur: str):
stripped = slur.strip()
variations = [stripped, stripped.upper(), stripped.capitalize()]
# capitalize multiple words if there are multiple words (just in case)
if " " in stripped:
variations.append(stripped.title())
def create_variations_slur_regex(slur: str) -> list[str]:
"""For a given match generates the corresponding replacer"""
permutations = get_permutations_slur(slur)
if slur.startswith(" ") and slur.endswith(" "):
return [rf"(\s|>)({var})(\s|<)" for var in variations]
return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
else:
return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations]
return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
def sub_matcher(match: Match):
def sub_matcher(match: Match) -> str:
# special case when it should match exact word
if len(match.groups()) is 3:
found = match.group(2)
@ -47,7 +72,7 @@ def sub_matcher(match: Match):
return (match.group(1) or '') + replacer + (match.group(4) or '')
def censor_slurs(body: str, logged_user):
def censor_slurs(body: str, logged_user) -> str:
if logged_user and not logged_user.slurreplacer:
return body

View File

@ -4,7 +4,45 @@ from unittest.mock import patch
from assertpy import assert_that
from files.helpers import word_censor
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
get_permutations_slur, first_upper, first_all_upper
def test_first_upper():
assert_that(first_upper("USS liberty")).is_equal_to("USS liberty")
assert_that(first_upper("uss liberty")).is_equal_to("Uss liberty")
assert_that(first_upper("uss Liberty")).is_equal_to("Uss Liberty")
def test_first_all_upper():
assert_that(first_all_upper("USS liberty")).is_equal_to("USS Liberty")
assert_that(first_all_upper("uss liberty")).is_equal_to("Uss Liberty")
assert_that(first_all_upper("uss Liberty")).is_equal_to("Uss Liberty")
def test_get_permutations_slur():
expected = {
"USS liberty incident": "Tragic accident aboard the USS Liberty",
"uss liberty incident": "tragic accident aboard the USS Liberty",
"USS Liberty Incident": "Tragic Accident Aboard The USS Liberty",
"USS LIBERTY INCIDENT": "TRAGIC ACCIDENT ABOARD THE USS LIBERTY",
}
result = get_permutations_slur("USS liberty incident", "tragic accident aboard the USS Liberty")
assert_that(result).is_equal_to(expected)
def test_get_permutations_slur_wiht_link_replacer():
expected = {
"kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
"Kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
"KIKE": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
}
result = get_permutations_slur("kike", "https://sciencedirect.com/science/article/abs/pii/S016028960600033X")
assert_that(result).is_equal_to(expected)
def test_create_variations_slur_regex_for_slur_with_spaces():
@ -29,8 +67,8 @@ def test_create_variations_slur_regex_single_word():
def test_create_variations_slur_regex_multiple_word():
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
result = create_variations_slur_regex("kill yourself")
@ -41,6 +79,7 @@ def test_create_variations_slur_regex_multiple_word():
"tranny": "πŸš‚πŸšƒπŸšƒ",
"kill yourself": "keep yourself safe",
"faggot": "cute twink",
"NoNewNormal": "NoNewNormal",
" nig ": "πŸ€",
})
def test_create_replace_map():
@ -50,15 +89,19 @@ def test_create_replace_map():
"TRANNY": "πŸš‚πŸšƒπŸšƒ",
"kill yourself": "keep yourself safe",
"Kill yourself": "Keep yourself safe",
"KILL YOURSELF": "KEEP YOURSELF SAFE",
"Kill Yourself": "Keep Yourself Safe",
"KILL YOURSELF": "KEEP YOURSELF SAFE",
"faggot": "cute twink",
"Faggot": "Cute twink",
"FAGGOT": "CUTE TWINK",
"NoNewNormal": "NoNewNormal",
"nonewnormal": "NoNewNormal",
"NONEWNORMAL": "NONEWNORMAL",
"nig": "πŸ€",
"Nig": "πŸ€",
"NIG": "πŸ€",
}
result = create_replace_map()
assert_that(result).is_equal_to(expected)
@ -79,7 +122,13 @@ def test_sub_matcher():
assert_that(sub_matcher(match)).is_equal_to(">πŸ€ ")
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': 'πŸ€'})
@patch("files.helpers.word_censor.SLURS", {
'retard': 'r-slur',
'manlet': 'little king',
' nig ': 'πŸ€',
'i hate Carp': 'i love Carp',
'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X'
})
def test_censor_slurs():
word_censor.REPLACE_MAP = create_replace_map()
@ -101,9 +150,29 @@ def test_censor_slurs():
assert_that(censor_slurs('<p>NIG</p>', None)).is_equal_to('<p>πŸ€</p>')
assert_that(censor_slurs('... nigeria ...', None)).is_equal_to('... nigeria ...')
assert_that(censor_slurs('... i hate Carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i hate carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
# Not covered:
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
.is_equal_to("<p>r-slured SuperLittle king πŸ€</p>")
assert_that(censor_slurs('... kike ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
assert_that(censor_slurs('... Kike ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
assert_that(censor_slurs('... KIKE ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': 'πŸ€'})
def test_censor_slurs_does_not_error_out_on_exception():