diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index ac71fcda2f..ab287c582c 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -96,6 +96,10 @@ AJ_REPLACEMENTS = { ' YOUR ': " YOU'RE ", ' TO ': " TOO ", + ' am ': ' is ', + ' Am ': ' Is ', + ' AM ': ' IS ', + 'everyone': 'everypony', 'everybody': 'everypony', diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 0f03c45e28..a1b2ec9921 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -67,10 +67,9 @@ slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words.upper()}", flags=re.A profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A) profanity_regex_upper = re.compile(f"<[^>]*>|{profanity_single_words.upper()}", flags=re.A) -torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) -torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) -torture_regex_exclude = re.compile('^\s*(>|`|
||
)', flags=re.A)
-
+torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A)
+torture_regex2 = re.compile("(^|\s)(i'm)($|\s)", flags=re.I|re.A)
+torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A)
 
 image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A)
 
@@ -160,20 +159,6 @@ def censor_slurs(body:Optional[str], logged_user):
 
 	return body
 
-def torture_ap(body, username):
-	lines = body.splitlines(keepends=True)
-
-	for i in range(len(lines)):
-		if torture_regex_exclude.match(lines[i]):
-			continue
-		for k, l in AJ_REPLACEMENTS.items():
-			lines[i] = lines[i].replace(k, l)
-		lines[i] = torture_regex.sub(rf'\1{username} ', lines[i])
-		lines[i] = torture_regex2.sub(rf'\1{username} is ', lines[i])
-
-	return ''.join(lines).strip()
-
-
 commands = {
 	"fortune": FORTUNE_REPLIES,
 	"factcheck": FACTCHECK_REPLIES,
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index e0f6b0c395..e411d58752 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -611,14 +611,21 @@ def validate_css(css):
 
 	return True, ""
 
-	
-phrase_tags = {'p','h1','h2','h3','h4','h5','h6'}
+
+
+def torture_ap(string, username):
+	if not string: return string
+	for k, l in AJ_REPLACEMENTS.items():
+		string = string.replace(k, l)
+	string = torture_regex.sub(rf'\1{username}\3', string)
+	string = torture_regex2.sub(rf'\1{username} is\3', string)
+	string = torture_regex3.sub(rf"\1{username}'s\3", string)
+	return string
+
 def complies_with_chud(obj):
+	#check for cases where u should leave
 	if not obj.author.agendaposter: return True
 	if obj.author.marseyawarded: return True
-
-	old_body_html = obj.body_html.lower()
-
 	if isinstance(obj, Submission):
 		if obj.id in ADMIGGER_THREADS: return True
 		if obj.sub == "chudrama": return True
@@ -626,22 +633,27 @@ def complies_with_chud(obj):
 		if obj.parent_submission in ADMIGGER_THREADS: return True
 		if obj.post.sub == "chudrama": return True
 
-	obj.body_html = torture_ap(obj.body_html, obj.author.username)
+	#get body_html's soup
+	soup = BeautifulSoup(obj.body_html, 'lxml')
+
+	#torture body_html
+	tags = soup.html.body.find_all(lambda tag: tag.name not in {'blockquote','codeblock','pre'}, recursive=False)
+	for tag in tags:
+		tag.string.replace_with(torture_ap(tag.text, obj.author.username))
+	obj.body_html = str(soup)
+
+	#torture title_html and check for agendaposter_phrase in plain title and leave if it's there
 	if isinstance(obj, Submission):
 		obj.title_html = torture_ap(obj.title_html, obj.author.username)
-		if obj.author.agendaposter_phrase in obj.title: return True
-
-	soup=BeautifulSoup(old_body_html, 'lxml')
-
-	if not soup.html:
-		print(f'{STARS}{old_body_html}{STARS}', flush=True)
-		return False
+		if obj.author.agendaposter_phrase in obj.title.lower():
+			return True
 
+	#check for agendaposter_phrase in body_html
+	phrase_tags = {'p','h1','h2','h3','h4','h5','h6'}
 	tags = soup.html.body.find_all(lambda tag: tag.name in phrase_tags and not tag.attrs, recursive=False)
-
 	for tag in tags:
 		for text in tag.find_all(text=True, recursive=False):
-			if obj.author.agendaposter_phrase in text:
+			if obj.author.agendaposter_phrase in text.lower():
 				return True
 
 	return False