MarseyWorld/files/helpers/slurs_and_profanities.py

import re
from .config.const import *

tranny = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain:" title=":marseytrain:" src="{SITE_FULL_IMAGES}/e/marseytrain.webp">'
trannie = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":!marseytrain:" title=":!marseytrain:" src="{SITE_FULL_IMAGES}/e/marseytrain.webp">'
troon = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain2:" title=":marseytrain2:" src="{SITE_FULL_IMAGES}/e/marseytrain2.webp">'

#DON'T ADD ANY FILTERS WITH ' OR ", VECTOR FOR HTML INJECTION
SLURS = {
	"tranny": tranny,
	"trannie": trannie,
	"troon": troon,
	"(?<!\\bs)nigger": "BIPOC",
	"negroid": "BIPOC",
	"nignog": "BIPOC",
	"nig nog": "BIPOC",
	"niglet": 'BIPOClet',
	"negress": "BIPOCette",
	"faggot": "cute twink",
	"fag": "strag",
	"(?<!\w)spic(?!\w)": "hard-working American",
	"(?<!\w)spics(?!\w)": "hard-working Americans",
	"kike": "jewish chad",
	"(?<!\w)heeb": "jewish chad",
	"daisy's destruction": "Cars 2",
	"daisys destruction": "Cars 2",
	"daisy destruction": "Cars 2",
	"pajeet": "sexy Indian dude",
	"dyke(?!\w)": "cute butch",
	"dykes": "cute butches",
}

#DON'T ADD ANY FILTERS WITH ' OR ", VECTOR FOR HTML INJECTION
if SITE_NAME == 'rDrama':
	SLURS |= {
		"retarded": "r-slurred",
		" a retard": " an r-slur",
		"retard": "r-slur",
		"(?<!\w)pedo(?!\w)": "p-do",
		"pedophile": "p-dophile",
		"kill youself": "keep yourself safe",
		"kill yourself": "keep yourself safe",
		"kill yourselves": "keep yourselves safe",
		"(?<!\w)kys(?!\w)": "keep yourself safe",
		"latinos": "latinx",
		"latino": "latinx",
		"latinas": "latinx",
		"latina": "latinx",
		"hispanics": "latinx",
		"hispanic": "latinx",
		"autistic": "neurodivergent",
		"gamer": "g*mer",
		"journalist": "journ*list",
		"journalism": "journ*lism",
		"fake and gay": "fake and straight",
		"(?<!\w)rapist": "male feminist",
		"it's almost if": "Im an r-slur but",
		"it's almost as if": "Im an r-slur but",
		"it's almost like": "Im an r-slur but",
		"its almost if": "Im an r-slur but",
		"its almost as if": "Im an r-slur but",
		"its almost like": "Im an r-slur but",
		"krayon(?! \()": "krayon (sister toucher)",
		"discord": "groomercord",
		'nigga': 'neighbor',
	}

PROFANITIES = {
	'motherfucker': 'motherlover',
	'fuck': 'frick',
	'(?<!\w)ass(?!\w)': 'butt',
	'shitting': 'pooping',
	'damn': 'darn',
	'bitch(?!\w)': 'b-word',
	'toilet': 'potty',
	'(?<!\w)asshole': 'butthole',
	'(?<!\w)rape': 'r*pe',
	'(?<!\w)hell(?!\w)': 'heck',
	'(?<!\w)sex(?!\w)': 's*x',
	'(?<!\w)cum(?!\w)': 'c*m',
	'(?<!\w)dick': 'peepee',
	'cock(?!\w)': 'peepee',
	'cocks': 'peepees',
	'penis': 'peepee',
	'pussy': 'kitty',
	'pussies': 'kitties',
	'cunt': 'c*nt',
}


slur_single_words = "|".join([slur.lower() for slur in SLURS.keys()])
profanity_single_words = "|".join([profanity.lower() for profanity in PROFANITIES.keys()])
slur_regex = re.compile(f"<[^>]*>|{slur_single_words}", flags=re.I|re.A)
profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A)

SLURS_FOR_REPLACING = {}
for k, val in SLURS.items():
	newkey = k.split('(?!')[0]
	if ')' in newkey:
		newkey = newkey.split(')')[1]
	SLURS_FOR_REPLACING[newkey] = val

PROFANITIES_FOR_REPLACING = {}
for k, val in PROFANITIES.items():
	newkey = k.split('(?!')[0]
	if ')' in newkey:
		newkey = newkey.split(')')[1]
	PROFANITIES_FOR_REPLACING[newkey] = val


def sub_matcher(match, X_FOR_REPLACING):
	group_num = 0
	match_str = match.group(group_num)
	if match_str.startswith('<'):
		return match_str
	else:
		repl = X_FOR_REPLACING[match_str.lower()]
		if "<img" not in repl:
			if match_str.isupper():
				return repl.upper()
			if match_str[0].isupper():
				return repl[0].upper() + repl[1:]
		return repl

def sub_matcher_slurs(match):
	return sub_matcher(match, SLURS_FOR_REPLACING)

def sub_matcher_profanities(match):
	return sub_matcher(match, PROFANITIES_FOR_REPLACING)


def censor_slurs_profanities(body, logged_user, is_plain=False):
	if not body: return ""

	if '<pre>' in body or '<code>' in body:
			return body

	if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
		body = slur_regex.sub(sub_matcher_slurs, body)

	if SITE_NAME == 'rDrama':
		if not logged_user or logged_user == 'chat' or logged_user.profanityreplacer:
			body = profanity_regex.sub(sub_matcher_profanities, body)

	if is_plain:
		body = body.replace(tranny, ':marseytrain:')
		body = body.replace(trannie, ':!marseytrain:')
		body = body.replace(troon, ':marseytrain2:')

	return body
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`import re`
			`from .config.const import *`

			`tranny = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain:" title=":marseytrain:" src="{SITE_FULL_IMAGES}/e/marseytrain.webp">'`
			`trannie = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":!marseytrain:" title=":!marseytrain:" src="{SITE_FULL_IMAGES}/e/marseytrain.webp">'`
			`troon = f'<img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain2:" title=":marseytrain2:" src="{SITE_FULL_IMAGES}/e/marseytrain2.webp">'`

add comments to remind myself 2024-02-12 07:18:43 +00:00			`#DON'T ADD ANY FILTERS WITH ' OR ", VECTOR FOR HTML INJECTION`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`SLURS = {`
			`"tranny": tranny,`
			`"trannie": trannie,`
			`"troon": troon,`
			`"(?<!\\bs)nigger": "BIPOC",`
			`"negroid": "BIPOC",`
			`"nignog": "BIPOC",`
			`"nig nog": "BIPOC",`
			`"niglet": 'BIPOClet',`
			`"negress": "BIPOCette",`
			`"faggot": "cute twink",`
			`"fag": "strag",`
			`"(?<!\w)spic(?!\w)": "hard-working American",`
			`"(?<!\w)spics(?!\w)": "hard-working Americans",`
			`"kike": "jewish chad",`
			`"(?<!\w)heeb": "jewish chad",`
			`"daisy's destruction": "Cars 2",`
			`"daisys destruction": "Cars 2",`
			`"daisy destruction": "Cars 2",`
			`"pajeet": "sexy Indian dude",`
fix this https://rdrama.net/post/210935/nhl-bans-pride-tape-on-hockey/5158420#context 2023-10-12 19:00:30 +00:00			`"dyke(?!\w)": "cute butch",`
			`"dykes": "cute butches",`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`}`

add comments to remind myself 2024-02-12 07:18:43 +00:00			`#DON'T ADD ANY FILTERS WITH ' OR ", VECTOR FOR HTML INJECTION`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`if SITE_NAME == 'rDrama':`
			`SLURS \|= {`
			`"retarded": "r-slurred",`
fix this https://rdrama.net/post/224827/he-was-a-20-year-old/5460520#context 2023-11-29 20:25:57 +00:00			`" a retard": " an r-slur",`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`"retard": "r-slur",`
delete old slur filters since they pile up and make reading shit difficult 2024-02-05 04:22:48 +00:00			`"(?<!\w)pedo(?!\w)": "p-do",`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`"pedophile": "p-dophile",`
			`"kill youself": "keep yourself safe",`
			`"kill yourself": "keep yourself safe",`
			`"kill yourselves": "keep yourselves safe",`
delete old slur filters since they pile up and make reading shit difficult 2024-02-05 04:22:48 +00:00			`"(?<!\w)kys(?!\w)": "keep yourself safe",`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`"latinos": "latinx",`
			`"latino": "latinx",`
			`"latinas": "latinx",`
			`"latina": "latinx",`
			`"hispanics": "latinx",`
			`"hispanic": "latinx",`
			`"autistic": "neurodivergent",`
			`"gamer": "g*mer",`
			`"journalist": "journ*list",`
			`"journalism": "journ*lism",`
			`"fake and gay": "fake and straight",`
			`"(?<!\w)rapist": "male feminist",`
remove ' 2024-02-12 05:34:44 +00:00			`"it's almost if": "Im an r-slur but",`
			`"it's almost as if": "Im an r-slur but",`
			`"it's almost like": "Im an r-slur but",`
			`"its almost if": "Im an r-slur but",`
			`"its almost as if": "Im an r-slur but",`
			`"its almost like": "Im an r-slur but",`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`"krayon(?! \()": "krayon (sister toucher)",`
			`"discord": "groomercord",`
			`'nigga': 'neighbor',`
			`}`

			`PROFANITIES = {`
			`'motherfucker': 'motherlover',`
			`'fuck': 'frick',`
			`'(?<!\w)ass(?!\w)': 'butt',`
			`'shitting': 'pooping',`
			`'damn': 'darn',`
			`'bitch(?!\w)': 'b-word',`
			`'toilet': 'potty',`
			`'(?<!\w)asshole': 'butthole',`
			`'(?<!\w)rape': 'r*pe',`
			`'(?<!\w)hell(?!\w)': 'heck',`
			`'(?<!\w)sex(?!\w)': 's*x',`
			`'(?<!\w)cum(?!\w)': 'c*m',`
			`'(?<!\w)dick': 'peepee',`
			`'cock(?!\w)': 'peepee',`
			`'cocks': 'peepees',`
			`'penis': 'peepee',`
			`'pussy': 'kitty',`
			`'pussies': 'kitties',`
			`'cunt': 'c*nt',`
			`}`



			`slur_single_words = "\|".join([slur.lower() for slur in SLURS.keys()])`
			`profanity_single_words = "\|".join([profanity.lower() for profanity in PROFANITIES.keys()])`
Revert "use NOT_IN_CODE_OR_LINKS" - causes TIMEOUTS This reverts commit 282f2c9588aad0537324d430d91435dc63f9435a. 2023-10-05 07:29:41 +00:00			`slur_regex = re.compile(f"<[^>]*>\|{slur_single_words}", flags=re.I\|re.A)`
			`profanity_regex = re.compile(f"<[^>]*>\|{profanity_single_words}", flags=re.I\|re.A)`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00
			`SLURS_FOR_REPLACING = {}`
			`for k, val in SLURS.items():`
			`newkey = k.split('(?!')[0]`
			`if ')' in newkey:`
			`newkey = newkey.split(')')[1]`
			`SLURS_FOR_REPLACING[newkey] = val`

			`PROFANITIES_FOR_REPLACING = {}`
			`for k, val in PROFANITIES.items():`
			`newkey = k.split('(?!')[0]`
			`if ')' in newkey:`
			`newkey = newkey.split(')')[1]`
			`PROFANITIES_FOR_REPLACING[newkey] = val`



			`def sub_matcher(match, X_FOR_REPLACING):`
			`group_num = 0`
			`match_str = match.group(group_num)`
			`if match_str.startswith('<'):`
			`return match_str`
			`else:`
fix 500 error when replacing "gAmer" 2023-09-22 17:06:13 +00:00			`repl = X_FOR_REPLACING[match_str.lower()]`
fix :marseytrain: slur filter 2023-09-24 22:32:31 +00:00			`if "<img" not in repl:`
			`if match_str.isupper():`
			`return repl.upper()`
minor slur filter improvement 2023-09-29 01:39:16 +00:00			`if match_str[0].isupper():`
fix "Gnu/Linux" 2023-09-29 21:27:55 +00:00			`return repl[0].upper() + repl[1:]`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`return repl`

			`def sub_matcher_slurs(match):`
			`return sub_matcher(match, SLURS_FOR_REPLACING)`

			`def sub_matcher_profanities(match):`
			`return sub_matcher(match, PROFANITIES_FOR_REPLACING)`



dont show <img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain2:" title=":marseytrain2:" src="https://i.rdrama.net/e/marseytrain2.webp"> when its not rendered 2023-09-26 15:37:07 +00:00			`def censor_slurs_profanities(body, logged_user, is_plain=False):`
refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`if not body: return ""`

Revert "use NOT_IN_CODE_OR_LINKS" - causes TIMEOUTS This reverts commit 282f2c9588aad0537324d430d91435dc63f9435a. 2023-10-05 07:29:41 +00:00			`if '<pre>' in body or '<code>' in body:`
			`return body`

refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:`
			`body = slur_regex.sub(sub_matcher_slurs, body)`

			`if SITE_NAME == 'rDrama':`
			`if not logged_user or logged_user == 'chat' or logged_user.profanityreplacer:`
			`body = profanity_regex.sub(sub_matcher_profanities, body)`

dont show <img loading="lazy" data-bs-toggle="tooltip" alt=":marseytrain2:" title=":marseytrain2:" src="https://i.rdrama.net/e/marseytrain2.webp"> when its not rendered 2023-09-26 15:37:07 +00:00			`if is_plain:`
			`body = body.replace(tranny, ':marseytrain:')`
			`body = body.replace(trannie, ':!marseytrain:')`
			`body = body.replace(troon, ':marseytrain2:')`

refactor slurs and profanities and put them in their own file 2023-09-22 17:01:30 +00:00			`return body`