From a9b078007e301dc562c127be5bdb7da507d7da75 Mon Sep 17 00:00:00 2001 From: Aevann Date: Fri, 22 Sep 2023 20:01:30 +0300 Subject: [PATCH] refactor slurs and profanities and put them in their own file --- files/classes/comment.py | 5 +- files/classes/hats.py | 4 +- files/classes/mod_logs.py | 4 +- files/classes/post.py | 9 +- files/classes/reports.py | 6 +- files/classes/sub_logs.py | 4 +- files/classes/user.py | 1 - files/helpers/alerts.py | 3 +- files/helpers/config/const.py | 106 ---------------- files/helpers/regex.py | 91 -------------- files/helpers/sharpen.py | 1 - files/helpers/slurs_and_profanities.py | 167 +++++++++++++++++++++++++ files/routes/admin.py | 3 +- files/routes/awards.py | 3 +- files/routes/chat.py | 5 +- files/routes/settings.py | 3 +- 16 files changed, 195 insertions(+), 220 deletions(-) create mode 100644 files/helpers/slurs_and_profanities.py diff --git a/files/classes/comment.py b/files/classes/comment.py index c8cf762d7..8068b5db7 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -12,6 +12,7 @@ from sqlalchemy.sql.sqltypes import * from files.classes import Base from files.helpers.config.const import * +from files.helpers.slurs_and_profanities import * from files.helpers.lazy import lazy from files.helpers.regex import * from files.helpers.sorting_and_time import * @@ -368,7 +369,7 @@ class Comment(Base): if body: if not (self.parent_post and self.post.sub == 'chudrama'): - body = censor_slurs(body, v) + body = censor_slurs_profanities(body, v) body = normalize_urls_runtime(body, v) @@ -384,7 +385,7 @@ class Comment(Base): if not body: return "" if not (self.parent_post and self.post.sub == 'chudrama'): - body = censor_slurs(body, v) + body = censor_slurs_profanities(body, v) body = replace_train_html(body) return body diff --git a/files/classes/hats.py b/files/classes/hats.py index 804e11063..b0ddd5b6d 100644 --- a/files/classes/hats.py +++ b/files/classes/hats.py @@ -7,7 +7,7 @@ from flask import g from files.classes import Base from files.helpers.lazy import lazy -from files.helpers.regex import censor_slurs +from files.helpers.slurs_and_profanities import censor_slurs_profanities class HatDef(Base): __tablename__ = "hat_defs" @@ -37,7 +37,7 @@ class HatDef(Base): @lazy def censored_description(self, v): - return censor_slurs(self.description, v) + return censor_slurs_profanities(self.description, v) @property @lazy diff --git a/files/classes/mod_logs.py b/files/classes/mod_logs.py index b2fdeccc7..72bf32615 100644 --- a/files/classes/mod_logs.py +++ b/files/classes/mod_logs.py @@ -7,7 +7,7 @@ from sqlalchemy.sql.sqltypes import * from files.classes import Base from files.helpers.config.const import * from files.helpers.lazy import lazy -from files.helpers.regex import censor_slurs +from files.helpers.slurs_and_profanities import censor_slurs_profanities from files.helpers.sorting_and_time import make_age_string class ModAction(Base): @@ -63,7 +63,7 @@ class ModAction(Base): if self.target_user_id: return f'@{self.target_user.username}' elif self.target_post_id: - return censor_slurs(f'{self.target_post.title_html}', None) + return censor_slurs_profanities(f'{self.target_post.title_html}', None) elif self.target_comment_id: return f'comment' diff --git a/files/classes/post.py b/files/classes/post.py index 7ea6ad1c0..bc23555ee 100644 --- a/files/classes/post.py +++ b/files/classes/post.py @@ -9,6 +9,7 @@ from sqlalchemy.sql.sqltypes import * from files.classes import Base from files.helpers.config.const import * +from files.helpers.slurs_and_profanities import * from files.helpers.lazy import lazy from files.helpers.regex import * from files.helpers.sorting_and_time import make_age_string @@ -300,7 +301,7 @@ class Post(Base): body = add_options(self, body, v) if self.sub != 'chudrama': - body = censor_slurs(body, v) + body = censor_slurs_profanities(body, v) body = normalize_urls_runtime(body, v) @@ -315,7 +316,7 @@ class Post(Base): if not body: return "" if self.sub != 'chudrama': - body = censor_slurs(body, v) + body = censor_slurs_profanities(body, v) body = replace_train_html(body) body = normalize_urls_runtime(body, v) @@ -327,7 +328,7 @@ class Post(Base): title = self.title_html if self.sub != 'chudrama': - title = censor_slurs(title, v) + title = censor_slurs_profanities(title, v) return title @@ -336,7 +337,7 @@ class Post(Base): title = self.title if self.sub != 'chudrama': - title = censor_slurs(title, v) + title = censor_slurs_profanities(title, v) title = replace_train_html(title) return title diff --git a/files/classes/reports.py b/files/classes/reports.py index 3ee76145e..f05e391f2 100644 --- a/files/classes/reports.py +++ b/files/classes/reports.py @@ -6,7 +6,7 @@ from sqlalchemy.sql.sqltypes import * from files.classes import Base from files.helpers.lazy import lazy -from files.helpers.regex import censor_slurs +from files.helpers.slurs_and_profanities import censor_slurs_profanities class Report(Base): __tablename__ = "reports" @@ -27,7 +27,7 @@ class Report(Base): @lazy def realreason(self, v): - return censor_slurs(self.reason, v) + return censor_slurs_profanities(self.reason, v) #lazy hack to avoid having to rename the comment_id column and causing potential new bugs @property @@ -55,7 +55,7 @@ class CommentReport(Base): @lazy def realreason(self, v): - return censor_slurs(self.reason, v) + return censor_slurs_profanities(self.reason, v) #lazy hack to avoid having to rename the comment_id column and causing potential new bugs @property diff --git a/files/classes/sub_logs.py b/files/classes/sub_logs.py index 99ca14f08..9cca9b9e0 100644 --- a/files/classes/sub_logs.py +++ b/files/classes/sub_logs.py @@ -7,7 +7,7 @@ from sqlalchemy.sql.sqltypes import * from files.classes import Base from files.helpers.config.const import * from files.helpers.lazy import lazy -from files.helpers.regex import censor_slurs +from files.helpers.slurs_and_profanities import censor_slurs_profanities from files.helpers.sorting_and_time import make_age_string class SubAction(Base): @@ -52,7 +52,7 @@ class SubAction(Base): if self.target_user_id: return f'@{self.target_user.username}' elif self.target_post_id: - return censor_slurs(f'{self.target_post.title_html}', None) + return censor_slurs_profanities(f'{self.target_post.title_html}', None) elif self.target_comment_id: return f'comment' diff --git a/files/classes/user.py b/files/classes/user.py index 565e3a377..33ba4e416 100644 --- a/files/classes/user.py +++ b/files/classes/user.py @@ -1,6 +1,5 @@ import random from operator import * -import re import pyotp from sqlalchemy import Column, ForeignKey, FetchedValue diff --git a/files/helpers/alerts.py b/files/helpers/alerts.py index 5378bdfc3..cd343360f 100644 --- a/files/helpers/alerts.py +++ b/files/helpers/alerts.py @@ -13,6 +13,7 @@ from files.classes import Comment, Notification, PushSubscription, Group from .config.const import * from .regex import * from .sanitize import * +from .slurs_and_profanities import censor_slurs_profanities def create_comment(text_html): new_comment = Comment(author_id=AUTOJANNY_ID, @@ -237,7 +238,7 @@ def push_notif(uids, title, body, url_or_comment): if len(body) > PUSH_NOTIF_LIMIT: body = body[:PUSH_NOTIF_LIMIT] + "..." - body = censor_slurs(body, None) + body = censor_slurs_profanities(body, None) subscriptions = g.db.query(PushSubscription.subscription_json).filter(PushSubscription.user_id.in_(uids)).all() subscriptions = [x[0] for x in subscriptions] diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index c4c3f8274..94606a5f9 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -117,40 +117,6 @@ GIRL_PHRASES = [ "$ PERIODT" ] -tranny = f':marseytrain:' -trannie = f':!marseytrain:' -troon = f':marseytrain2:' - -def replace_train_html(body): - body = body.replace(tranny, ':marseytrain:') - body = body.replace(trannie, ':!marseytrain:') - body = body.replace(troon, ':marseytrain2:') - return body - -SLURS = { - "tranny": tranny, - "trannie": trannie, - "troon": troon, - "(?(.+?)<\/a>', flags= email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A) -slur_single_words = "|".join([slur.lower() for slur in SLURS.keys()]) -slur_single_words_title = slur_single_words.title().replace('\W','\w') -slur_single_words_upper = slur_single_words.upper().replace('\W','\w') - -profanity_single_words = "|".join([profanity.lower() for profanity in PROFANITIES.keys()]) -profanity_single_words_title = profanity_single_words.title().replace('\W','\w') -profanity_single_words_upper = profanity_single_words.upper().replace('\W','\w') - -slur_regex = re.compile(f"<[^>]*>|{slur_single_words}", flags=re.I|re.A) -slur_regex_title = re.compile(f"<[^>]*>|{slur_single_words_title}", flags=re.A) -slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words_upper}", flags=re.A) - -profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A) -profanity_regex_title = re.compile(f"<[^>]*>|{profanity_single_words_title}", flags=re.A) -profanity_regex_upper = re.compile(f"<[^>]*>|{profanity_single_words_upper}", flags=re.A) - torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A) torture_regex2 = re.compile("(^|\s)(i'm)($|\s)", flags=re.I|re.A) torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A) @@ -158,81 +142,6 @@ pronouns_regex = re.compile("([a-z]{1,7})\/[a-z]{1,7}(\/[a-z]{1,7})?", flags=re. html_title_regex = re.compile("(.{1,200})", flags=re.I) - - -SLURS_FOR_REPLACING = {} -for k, val in SLURS.items(): - newkey = k.split('(?!')[0] - if ')' in newkey: - newkey = newkey.split(')')[1] - SLURS_FOR_REPLACING[newkey] = val - -PROFANITIES_FOR_REPLACING = {} -for k, val in PROFANITIES.items(): - newkey = k.split('(?!')[0] - if ')' in newkey: - newkey = newkey.split(')')[1] - PROFANITIES_FOR_REPLACING[newkey] = val - -def sub_matcher(match, upper=False, title=False, replace_with=SLURS_FOR_REPLACING): - group_num = 0 - match_str = match.group(group_num) - if match_str.startswith('<'): - return match_str - else: - repl = replace_with[match_str.lower()] - if (not upper and not title) or "' in body or '' in body: - return body - - if not logged_user or logged_user == 'chat' or logged_user.slurreplacer: - body = replace_re(body, slur_regex, slur_regex_title, slur_regex_upper, sub_matcher_slurs, sub_matcher_slurs_title, sub_matcher_slurs_upper) - - if SITE_NAME == 'rDrama': - if not logged_user or logged_user == 'chat' or logged_user.profanityreplacer: - body = replace_re(body, profanity_regex, profanity_regex_title, profanity_regex_upper, sub_matcher_profanities, sub_matcher_profanities_title, sub_matcher_profanities_upper) - - return body - commands = { "fortune": FORTUNE_REPLIES, "factcheck": FACTCHECK_REPLIES, diff --git a/files/helpers/sharpen.py b/files/helpers/sharpen.py index 6f52dfeb8..04903ec53 100644 --- a/files/helpers/sharpen.py +++ b/files/helpers/sharpen.py @@ -1,4 +1,3 @@ -import re from files.helpers.regex import * def sharpen(string): diff --git a/files/helpers/slurs_and_profanities.py b/files/helpers/slurs_and_profanities.py new file mode 100644 index 000000000..eb121f9da --- /dev/null +++ b/files/helpers/slurs_and_profanities.py @@ -0,0 +1,167 @@ +import re +from .config.const import * + +tranny = f':marseytrain:' +trannie = f':!marseytrain:' +troon = f':marseytrain2:' + +def replace_train_html(body): + body = body.replace(tranny, ':marseytrain:') + body = body.replace(trannie, ':!marseytrain:') + body = body.replace(troon, ':marseytrain2:') + return body + +SLURS = { + "tranny": tranny, + "trannie": trannie, + "troon": troon, + "(?]*>|{slur_single_words}", flags=re.I|re.A) +profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A) + +SLURS_FOR_REPLACING = {} +for k, val in SLURS.items(): + newkey = k.split('(?!')[0] + if ')' in newkey: + newkey = newkey.split(')')[1] + SLURS_FOR_REPLACING[newkey] = val + SLURS_FOR_REPLACING[newkey.title()] = val.title() + SLURS_FOR_REPLACING[newkey.upper()] = val.upper() + +PROFANITIES_FOR_REPLACING = {} +for k, val in PROFANITIES.items(): + newkey = k.split('(?!')[0] + if ')' in newkey: + newkey = newkey.split(')')[1] + PROFANITIES_FOR_REPLACING[newkey] = val + PROFANITIES_FOR_REPLACING[newkey.title()] = val.title() + PROFANITIES_FOR_REPLACING[newkey.upper()] = val.upper() + + + +def sub_matcher(match, X_FOR_REPLACING): + group_num = 0 + match_str = match.group(group_num) + if match_str.startswith('<'): + return match_str + else: + repl = X_FOR_REPLACING[match_str] + return repl + +def sub_matcher_slurs(match): + return sub_matcher(match, SLURS_FOR_REPLACING) + +def sub_matcher_profanities(match): + return sub_matcher(match, PROFANITIES_FOR_REPLACING) + + + +def censor_slurs_profanities(body, logged_user): + if not body: return "" + + if '
' in body or '' in body:
+			return body
+
+	if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
+		body = slur_regex.sub(sub_matcher_slurs, body)
+
+	if SITE_NAME == 'rDrama':
+		if not logged_user or logged_user == 'chat' or logged_user.profanityreplacer:
+			body = profanity_regex.sub(sub_matcher_profanities, body)
+
+	return body
diff --git a/files/routes/admin.py b/files/routes/admin.py
index 8cb2e3a60..abeed62c9 100644
--- a/files/routes/admin.py
+++ b/files/routes/admin.py
@@ -11,6 +11,7 @@ from files.helpers.actions import *
 from files.helpers.alerts import *
 from files.helpers.cloudflare import *
 from files.helpers.config.const import *
+from files.helpers.slurs_and_profanities import censor_slurs_profanities
 from files.helpers.get import *
 from files.helpers.media import *
 from files.helpers.sanitize import *
@@ -909,7 +910,7 @@ def admin_title_change(user_id, v):
 
 	user.customtitleplain = new_name
 	new_name = filter_emojis_only(new_name)
-	new_name = censor_slurs(new_name, None)
+	new_name = censor_slurs_profanities(new_name, None)
 
 	user = get_account(user.id)
 	user.customtitle=new_name
diff --git a/files/routes/awards.py b/files/routes/awards.py
index 9ff7694dd..8e6500d07 100644
--- a/files/routes/awards.py
+++ b/files/routes/awards.py
@@ -9,6 +9,7 @@ from files.classes.userblock import UserBlock
 from files.helpers.actions import *
 from files.helpers.alerts import *
 from files.helpers.config.const import *
+from files.helpers.slurs_and_profanities import censor_slurs_profanities
 from files.helpers.config.awards import AWARDS_ENABLED, HOUSE_AWARDS, LOOTBOX_ITEM_COUNT, LOOTBOX_CONTENTS
 from files.helpers.get import *
 from files.helpers.marsify import marsify
@@ -394,7 +395,7 @@ def award_thing(v, thing_type, id):
 		else:
 			author.customtitleplain = new_name
 			new_name = filter_emojis_only(new_name)
-			new_name = censor_slurs(new_name, None)
+			new_name = censor_slurs_profanities(new_name, None)
 			if len(new_name) > 1000: abort(403)
 			author.customtitle = new_name
 			author.flairchanged = int(time.time()) + 86400
diff --git a/files/routes/chat.py b/files/routes/chat.py
index 5ab422259..d76466069 100644
--- a/files/routes/chat.py
+++ b/files/routes/chat.py
@@ -9,6 +9,7 @@ from flask import request
 from files.helpers.actions import *
 from files.helpers.alerts import *
 from files.helpers.config.const import *
+from files.helpers.slurs_and_profanities import censor_slurs_profanities
 from files.helpers.regex import *
 from files.helpers.media import *
 from files.helpers.sanitize import *
@@ -167,9 +168,9 @@ def speak(data, v):
 		"namecolor": v.name_color,
 		"patron": v.patron,
 		"text": text,
-		"text_censored": censor_slurs(text, 'chat'),
+		"text_censored": censor_slurs_profanities(text, 'chat'),
 		"text_html": text_html,
-		"text_html_censored": censor_slurs(text_html, 'chat'),
+		"text_html_censored": censor_slurs_profanities(text_html, 'chat'),
 		"time": int(time.time()),
 	}
 
diff --git a/files/routes/settings.py b/files/routes/settings.py
index fcd27ba7d..c0140aca9 100644
--- a/files/routes/settings.py
+++ b/files/routes/settings.py
@@ -12,6 +12,7 @@ from sqlalchemy.orm import load_only
 from files.helpers.actions import *
 from files.helpers.alerts import *
 from files.helpers.config.const import *
+from files.helpers.slurs_and_profanities import censor_slurs_profanities
 from files.helpers.get import *
 from files.helpers.mail import *
 from files.helpers.media import *
@@ -908,7 +909,7 @@ def settings_title_change(v):
 
 	if customtitleplain:
 		customtitle = filter_emojis_only(customtitleplain)
-		customtitle = censor_slurs(customtitle, None)
+		customtitle = censor_slurs_profanities(customtitle, None)
 
 		if len(customtitle) > 1000:
 			abort(400, "Flair too long!")