diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index ac71fcda2f..ab287c582c 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -96,6 +96,10 @@ AJ_REPLACEMENTS = { ' YOUR ': " YOU'RE ", ' TO ': " TOO ", + ' am ': ' is ', + ' Am ': ' Is ', + ' AM ': ' IS ', + 'everyone': 'everypony', 'everybody': 'everypony', diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 0f03c45e28..a1b2ec9921 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -67,10 +67,9 @@ slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words.upper()}", flags=re.A profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A) profanity_regex_upper = re.compile(f"<[^>]*>|{profanity_single_words.upper()}", flags=re.A) -torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) -torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) -torture_regex_exclude = re.compile('^\s*(>|`|
|| )', flags=re.A) - +torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A) +torture_regex2 = re.compile("(^|\s)(i'm)($|\s)", flags=re.I|re.A) +torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A) image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A) @@ -160,20 +159,6 @@ def censor_slurs(body:Optional[str], logged_user): return body -def torture_ap(body, username): - lines = body.splitlines(keepends=True) - - for i in range(len(lines)): - if torture_regex_exclude.match(lines[i]): - continue - for k, l in AJ_REPLACEMENTS.items(): - lines[i] = lines[i].replace(k, l) - lines[i] = torture_regex.sub(rf'\1{username} ', lines[i]) - lines[i] = torture_regex2.sub(rf'\1{username} is ', lines[i]) - - return ''.join(lines).strip() - - commands = { "fortune": FORTUNE_REPLIES, "factcheck": FACTCHECK_REPLIES, diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index e0f6b0c395..e411d58752 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -611,14 +611,21 @@ def validate_css(css): return True, "" - -phrase_tags = {'p','h1','h2','h3','h4','h5','h6'} + + +def torture_ap(string, username): + if not string: return string + for k, l in AJ_REPLACEMENTS.items(): + string = string.replace(k, l) + string = torture_regex.sub(rf'\1{username}\3', string) + string = torture_regex2.sub(rf'\1{username} is\3', string) + string = torture_regex3.sub(rf"\1{username}'s\3", string) + return string + def complies_with_chud(obj): + #check for cases where u should leave if not obj.author.agendaposter: return True if obj.author.marseyawarded: return True - - old_body_html = obj.body_html.lower() - if isinstance(obj, Submission): if obj.id in ADMIGGER_THREADS: return True if obj.sub == "chudrama": return True @@ -626,22 +633,27 @@ def complies_with_chud(obj): if obj.parent_submission in ADMIGGER_THREADS: return True if obj.post.sub == "chudrama": return True - obj.body_html = torture_ap(obj.body_html, obj.author.username) + #get body_html's soup + soup = BeautifulSoup(obj.body_html, 'lxml') + + #torture body_html + tags = soup.html.body.find_all(lambda tag: tag.name not in {'blockquote','codeblock','pre'}, recursive=False) + for tag in tags: + tag.string.replace_with(torture_ap(tag.text, obj.author.username)) + obj.body_html = str(soup) + + #torture title_html and check for agendaposter_phrase in plain title and leave if it's there if isinstance(obj, Submission): obj.title_html = torture_ap(obj.title_html, obj.author.username) - if obj.author.agendaposter_phrase in obj.title: return True - - soup=BeautifulSoup(old_body_html, 'lxml') - - if not soup.html: - print(f'{STARS}{old_body_html}{STARS}', flush=True) - return False + if obj.author.agendaposter_phrase in obj.title.lower(): + return True + #check for agendaposter_phrase in body_html + phrase_tags = {'p','h1','h2','h3','h4','h5','h6'} tags = soup.html.body.find_all(lambda tag: tag.name in phrase_tags and not tag.attrs, recursive=False) - for tag in tags: for text in tag.find_all(text=True, recursive=False): - if obj.author.agendaposter_phrase in text: + if obj.author.agendaposter_phrase in text.lower(): return True return False