From 9593dc58ddd90174fd4272e3faad7a7c955032cb Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Fri, 24 Jun 2022 16:30:59 +0200 Subject: [PATCH] move regex from helpers.const into its own helper file --- files/classes/comment.py | 1 + files/classes/submission.py | 1 + files/helpers/alerts.py | 1 + files/helpers/const.py | 123 +----------------------------------- files/helpers/regex.py | 123 ++++++++++++++++++++++++++++++++++++ files/routes/chat.py | 1 + files/routes/comments.py | 1 + files/routes/login.py | 1 + files/routes/posts.py | 1 + files/routes/search.py | 1 + files/routes/settings.py | 1 + files/routes/subs.py | 1 + 12 files changed, 134 insertions(+), 122 deletions(-) create mode 100644 files/helpers/regex.py diff --git a/files/classes/comment.py b/files/classes/comment.py index d4db025a9..3a53a090c 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import relationship from files.__main__ import Base from files.classes.votes import CommentVote from files.helpers.const import * +from files.helpers.regex import * from files.helpers.lazy import lazy from .flags import CommentFlag from random import randint diff --git a/files/classes/submission.py b/files/classes/submission.py index 3d963c473..cdf21a7af 100644 --- a/files/classes/submission.py +++ b/files/classes/submission.py @@ -8,6 +8,7 @@ from sqlalchemy import * from sqlalchemy.orm import relationship, deferred from files.__main__ import Base from files.helpers.const import * +from files.helpers.regex import * from files.helpers.lazy import lazy from .flags import Flag from .comment import Comment, normalize_urls_runtime diff --git a/files/helpers/alerts.py b/files/helpers/alerts.py index bc0b3f709..1cf6af392 100644 --- a/files/helpers/alerts.py +++ b/files/helpers/alerts.py @@ -2,6 +2,7 @@ from files.classes import * from flask import g from .sanitize import * from .const import * +from .regex import * def create_comment(text_html, autojanny=False): if autojanny: author_id = AUTOJANNY_ID diff --git a/files/helpers/const.py b/files/helpers/const.py index 849341dfe..a82a96060 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -827,95 +827,6 @@ marseys_const = [x[0] for x in db.query(Marsey.name).filter(Marsey.name!='chudse marseys_const2 = marseys_const + ['chudsey','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9','exclamationpoint','period','questionmark'] db.close() -if SITE_NAME == 'PCM': - valid_username_chars = 'a-zA-Z0-9_\-А-я' - valid_username_regex = re.compile("^[a-zA-Z0-9_\-А-я]{3,25}$", flags=re.A) - mention_regex = re.compile('(^|\s|

)@(([a-zA-Z0-9_\-А-я]){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) -else: - valid_username_chars = 'a-zA-Z0-9_\-' - valid_username_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A) - mention_regex = re.compile('(^|\s|

)@(([a-zA-Z0-9_\-]){1,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) - -valid_password_regex = re.compile("^.{8,100}$", flags=re.A) - -marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A) - -marseyaward_title_regex = re.compile("( *]+>)+", flags=re.A) - -marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A) - -tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A) - -valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A) - -query_regex = re.compile("(\w+):(\S+)", flags=re.A) - -poll_regex = re.compile("\s*\$\$([^\$\n]+)\$\$\s*", flags=re.A) -bet_regex = re.compile("\s*\$\$\$([^\$\n]+)\$\$\$\s*", flags=re.A) -choice_regex = re.compile("\s*&&([^\$\n]+)&&\s*", flags=re.A) - -title_regex = re.compile("[^\w ]", flags=re.A) - -based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A) - -controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A) - -fishylinks_regex = re.compile("https?://\S+", flags=re.A) - -spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A) -reddit_regex = re.compile('(^|\s|

)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) -sub_regex = re.compile('(^|\s|

)\/?(h\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) - -strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A) - -mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A) - -emoji_regex = re.compile(f"

\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\s*)+<\/p>", flags=re.A) -emoji_regex2 = re.compile(f'(?(.+?)<\/a>', flags=re.A) - -email_regex = re.compile('[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', flags=re.A|re.I) - -utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) -utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) - -slur_regex = re.compile(f"<[^>]*>|{single_words}", flags=re.I|re.A) -slur_regex_upper = re.compile(f"<[^>]*>|{single_words.upper()}", flags=re.A) -torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) -torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) -torture_regex_exclude = re.compile('^\s*>', flags=re.A) - -def sub_matcher(match, upper=False): - if match.group(0).startswith('<'): - return match.group(0) - else: - repl = SLURS[match.group(0).lower()] - return repl if not upper else repl.upper() - -def sub_matcher_upper(match): - return sub_matcher(match, upper=True) - -def censor_slurs(body, logged_user): - if not logged_user or logged_user == 'chat' or logged_user.slurreplacer: - body = slur_regex_upper.sub(sub_matcher_upper, body) - body = slur_regex.sub(sub_matcher, body) - return body - -def torture_ap(body, username): - lines = body.splitlines(keepends=True) - - for i in range(len(lines)): - if torture_regex_exclude.match(lines[i]): - continue - for k, l in AJ_REPLACEMENTS.items(): - lines[i] = lines[i].replace(k, l) - lines[i] = torture_regex.sub(rf'\1@{username} ', lines[i]) - lines[i] = torture_regex2.sub(rf'\1@{username} is ', lines[i]) - - return ''.join(lines) - YOUTUBE_KEY = environ.get("YOUTUBE_KEY", "").strip() ADMIGGERS = (37696,37697,37749,37833,37838,39413) @@ -995,36 +906,4 @@ def is_safe_url(url): return '\\' not in url and (url.startswith('/') or tldextract.extract(url).registered_domain in approved_embed_hosts) -hosts = "|".join(approved_embed_hosts).replace('.','\.') - -image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A) - -video_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A) -audio_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A) - -imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.(jpg|png|jpeg|webp)', flags=re.I|re.A) -giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A) - -youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A) - -yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) - -image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+,:]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp)(\?[\w\-.#&/=\?@%;+,:]*)?)($|\s)", flags=re.I|re.A) - -link_fix_regex = re.compile("(?!.*(http|\/))(.*\[[^\]]+\]\()([^)]+\))", flags=re.A) - -css_regex = re.compile('https?:\/\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.I|re.A) - -procoins_li = (0,2500,5000,10000,25000,50000,125000,250000) - -linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) - -greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A) - -ascii_only_regex = re.compile("[ -~]+", flags=re.A) - -twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{4,15}(\/status\/\d+[^/]*)?)", flags=re.A) - -reddit_domain_regex = re.compile("(^|\s)https:\/\/(reddit\.com|new\.reddit.com|www\.reddit.com|i\.reddit\.com|redd\.it|libredd\.it|teddit\.net)\/r\/", flags=re.A) - -def make_name(*args, **kwargs): return request.base_url +hosts = "|".join(approved_embed_hosts).replace('.','\.') \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py new file mode 100644 index 000000000..413e2b4f3 --- /dev/null +++ b/files/helpers/regex.py @@ -0,0 +1,123 @@ +import re +from files.helpers.const import * + +if SITE_NAME == 'PCM': + valid_username_chars = 'a-zA-Z0-9_\-А-я' + valid_username_regex = re.compile("^[a-zA-Z0-9_\-А-я]{3,25}$", flags=re.A) + mention_regex = re.compile('(^|\s|

)@(([a-zA-Z0-9_\-А-я]){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) +else: + valid_username_chars = 'a-zA-Z0-9_\-' + valid_username_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A) + mention_regex = re.compile('(^|\s|

)@(([a-zA-Z0-9_\-]){1,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) + +valid_password_regex = re.compile("^.{8,100}$", flags=re.A) + +marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A) + +marseyaward_title_regex = re.compile("( *]+>)+", flags=re.A) + +marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A) + +tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A) + +valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A) + +query_regex = re.compile("(\w+):(\S+)", flags=re.A) + +poll_regex = re.compile("\s*\$\$([^\$\n]+)\$\$\s*", flags=re.A) +bet_regex = re.compile("\s*\$\$\$([^\$\n]+)\$\$\$\s*", flags=re.A) +choice_regex = re.compile("\s*&&([^\$\n]+)&&\s*", flags=re.A) + +title_regex = re.compile("[^\w ]", flags=re.A) + +based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A) + +controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A) + +fishylinks_regex = re.compile("https?://\S+", flags=re.A) + +spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A) +reddit_regex = re.compile('(^|\s|

)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) +sub_regex = re.compile('(^|\s|

)\/?(h\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A) + +strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A) + +mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A) + +emoji_regex = re.compile(f"

\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\s*)+<\/p>", flags=re.A) +emoji_regex2 = re.compile(f'(?(.+?)<\/a>', flags=re.A) + +email_regex = re.compile('[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', flags=re.A|re.I) + +utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) +utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) + +slur_regex = re.compile(f"<[^>]*>|{single_words}", flags=re.I|re.A) +slur_regex_upper = re.compile(f"<[^>]*>|{single_words.upper()}", flags=re.A) +torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) +torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) +torture_regex_exclude = re.compile('^\s*>', flags=re.A) + + +image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A) + +video_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A) +audio_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A) + +imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.(jpg|png|jpeg|webp)', flags=re.I|re.A) +giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A) + +youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A) + +yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) + +image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+,:]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp)(\?[\w\-.#&/=\?@%;+,:]*)?)($|\s)", flags=re.I|re.A) + +link_fix_regex = re.compile("(?!.*(http|\/))(.*\[[^\]]+\]\()([^)]+\))", flags=re.A) + +css_regex = re.compile('https?:\/\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.I|re.A) + +procoins_li = (0,2500,5000,10000,25000,50000,125000,250000) + +linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) + +greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A) + +ascii_only_regex = re.compile("[ -~]+", flags=re.A) + +twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{4,15}(\/status\/\d+[^/]*)?)", flags=re.A) + +reddit_domain_regex = re.compile("(^|\s)https:\/\/(reddit\.com|new\.reddit.com|www\.reddit.com|i\.reddit\.com|redd\.it|libredd\.it|teddit\.net)\/r\/", flags=re.A) + + +def sub_matcher(match, upper=False): + if match.group(0).startswith('<'): + return match.group(0) + else: + repl = SLURS[match.group(0).lower()] + return repl if not upper else repl.upper() + +def sub_matcher_upper(match): + return sub_matcher(match, upper=True) + +def censor_slurs(body, logged_user): + if not logged_user or logged_user == 'chat' or logged_user.slurreplacer: + body = slur_regex_upper.sub(sub_matcher_upper, body) + body = slur_regex.sub(sub_matcher, body) + return body + +def torture_ap(body, username): + lines = body.splitlines(keepends=True) + + for i in range(len(lines)): + if torture_regex_exclude.match(lines[i]): + continue + for k, l in AJ_REPLACEMENTS.items(): + lines[i] = lines[i].replace(k, l) + lines[i] = torture_regex.sub(rf'\1@{username} ', lines[i]) + lines[i] = torture_regex2.sub(rf'\1@{username} is ', lines[i]) + + return ''.join(lines) \ No newline at end of file diff --git a/files/routes/chat.py b/files/routes/chat.py index 1075d60cc..089732a19 100644 --- a/files/routes/chat.py +++ b/files/routes/chat.py @@ -2,6 +2,7 @@ import time from files.helpers.wrappers import auth_required from files.helpers.sanitize import sanitize from files.helpers.const import * +from files.helpers.regex import * from datetime import datetime from flask_socketio import SocketIO, emit from files.__main__ import app, limiter, cache diff --git a/files/routes/comments.py b/files/routes/comments.py index 19b74add8..15488c7cb 100644 --- a/files/routes/comments.py +++ b/files/routes/comments.py @@ -2,6 +2,7 @@ from files.helpers.wrappers import * from files.helpers.alerts import * from files.helpers.media import * from files.helpers.const import * +from files.helpers.regex import * from files.helpers.slots import * from files.helpers.blackjack import * from files.helpers.treasure import * diff --git a/files/routes/login.py b/files/routes/login.py index afa4f4b5a..80dab5695 100644 --- a/files/routes/login.py +++ b/files/routes/login.py @@ -2,6 +2,7 @@ from urllib.parse import urlencode from files.mail import * from files.__main__ import app, limiter from files.helpers.const import * +from files.helpers.regex import * from files.helpers.actions import * from files.helpers.get import * import requests diff --git a/files/routes/posts.py b/files/routes/posts.py index e414c35b4..65426ba46 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -6,6 +6,7 @@ from files.helpers.sanitize import * from files.helpers.alerts import * from files.helpers.discord import send_discord_message from files.helpers.const import * +from files.helpers.regex import * from files.helpers.slots import * from files.helpers.get import * from files.classes import * diff --git a/files/routes/search.py b/files/routes/search.py index 231f2e589..fa6a095b0 100644 --- a/files/routes/search.py +++ b/files/routes/search.py @@ -3,6 +3,7 @@ import re from sqlalchemy import * from flask import * from files.__main__ import app +from files.helpers.regex import * valid_params = [ 'author', diff --git a/files/routes/settings.py b/files/routes/settings.py index f30a18941..350e1d2be 100644 --- a/files/routes/settings.py +++ b/files/routes/settings.py @@ -3,6 +3,7 @@ from files.helpers.alerts import * from files.helpers.sanitize import * from files.helpers.discord import remove_user, set_nick from files.helpers.const import * +from files.helpers.regex import * from files.helpers.actions import * from files.helpers.get import * from files.mail import * diff --git a/files/routes/subs.py b/files/routes/subs.py index 90d8fb847..92d8ff5f0 100644 --- a/files/routes/subs.py +++ b/files/routes/subs.py @@ -2,6 +2,7 @@ from files.__main__ import app, limiter, mail from files.helpers.alerts import * from files.helpers.wrappers import * from files.helpers.get import * +from files.helpers.regex import * from files.classes import * from .front import frontlist import tldextract