import random import re from flask import g from files.classes.media import Media from .config.const import * NOT_IN_CODE_OR_LINKS = '(?!([^<]*<\/(code|pre|a)>|[^`\n]*`))' valid_username_regex = re.compile("^[\w-]{3,25}$", flags=re.A) valid_username_patron_regex = re.compile("^[\w-]{1,25}$", flags=re.A) mention_regex = re.compile('(?)!(everyone)' + NOT_IN_CODE_OR_LINKS, flags=re.A) valid_password_regex = re.compile("^.{8,100}$", flags=re.A) marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A) marseyaward_title_regex = re.compile("( *]+>)+", flags=re.A) emoji_name_regex = re.compile("[a-z0-9]{1,30}", flags=re.A) tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A) hat_name_regex = re.compile("[\w\-() ,]{1,50}", flags=re.A) description_regex = re.compile("[^<>&\n\t]{1,300}", flags=re.A) badge_name_regex = re.compile(r"[^\/.]+", flags=re.A) hole_group_name_regex = re.compile("^[\w-]{3,25}$", flags=re.A) query_regex = re.compile("(\w+):(\S+)", flags=re.A) poll_regex = re.compile("(^|\n|>)\$\$([^\n]+?)\$\$\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A) bet_regex = re.compile("(^|\n|>)##([^\n]+?)##\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A) choice_regex = re.compile("(^|\n|>)&&([^\n]+?)&&\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A) html_comment_regex = re.compile("", flags=re.A) title_regex = re.compile("[^\w ]", flags=re.A) controversial_regex = re.compile('https:\/\/old\.reddit\.com\/r\/\w{2,20}\/comments\/[\w\-.#&/=\?@%+]{5,250}', flags=re.A) spoiler_regex = re.compile('\|\|(.+?)\|\|' + NOT_IN_CODE_OR_LINKS, flags=re.A) hole_mention_regex = re.compile('(?|")~{1,2}([^~]+)~{1,2}' + NOT_IN_CODE_OR_LINKS, flags=re.A) mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) unmute_regex = re.compile("\/unmute @?([\w-]{1,30})", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?)', flags=re.A) snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A) torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A) torture_regex2 = re.compile("(^|\s)(i'?m)($|\s)", flags=re.I|re.A) torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A) #matches ". ", does not match "..." or a.b sentence_ending_regex = re.compile('(?+)', flags=re.I|re.A) #matches "the" or is, but only if it is not followed by "fucking". https://regex101.com/r/yxuYsQ/2 the_fucking_regex = re.compile('(?<=^|(?<=\s))((?:the|a)( (?:only))?|((that )?(?:is|are|was|were|will be|would be)( (?:your|her|his|their|no|a|not|to|too|so|this|the|our|what))?( (a|the))?)|is)(?=\s)(?! fucking)' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A) #matches a single question mark but only if it isn't preceded by ", bitch" bitch_question_mark_regex = re.compile('(?

' return f'

' audio_regex_extensions = '|'.join(AUDIO_FORMATS) audio_sub_regex = re.compile(f'(?([^ >][^\n]*)", flags=re.A) allowed_domain_regex = re.compile("[a-z0-9\-.]+", flags=re.I|re.A) twitter_domain_regex = re.compile('(^|>|")https:\/\/twitter.com\/(?!i\/)', flags=re.A) instagram_to_imgsed_regex = re.compile('(^|>|")https:\/\/instagram.com\/(?!reel\/)', flags=re.A) color_regex = re.compile("[a-f0-9]{6}", flags=re.A) # lazy match on the .*?, only match if there is trailing stuff # Specifically match Snappy's way of formatting, this might break some losers' comments. showmore_regex = re.compile(r"^(.*?

(?:)?)(\s*

.*)", flags=re.A|re.DOTALL) search_token_regex = re.compile('"([^"]*)"|(\S+)', flags=re.A) git_regex = re.compile("ref: (refs/.+)", flags=re.A) pronouns_regex = re.compile("(\w{1,7})\/\w{1,7}(\/\w{1,7})?", flags=re.A) html_title_regex = re.compile("(.{1,200})", flags=re.I) excessive_css_scale_regex = re.compile("scale\([^)]*?(\d{2})", flags=re.A) word_alert_regex = re.compile(r'\b(764|o9a|9 angles|hurtcore|pthc)\b', flags=re.A|re.I) commands = { "fortune": FORTUNE_REPLIES, "factcheck": FACTCHECK_REPLIES, "8ball": EIGHTBALL_REPLIES, "coinflip": COINFLIP_HEADS_OR_TAILS, } command_regex = re.compile("(\s|^)#(fortune|factcheck|8ball|roll([1-9][0-9]*)|coinflip)", flags=re.A|re.I) def command_regex_matcher(match): if match.group(2) == 'coinflip' and random.random() < 0.01: result = COINFLIP_EDGE else: if match.group(2).startswith('roll'): max_num = int(match.group(3)) result = random.randint(1, max_num) color = tuple(random.choices(range(256), k=3)) result = f'Your roll (1-{max_num}): {result}' else: result = str(random.choice(commands[match.group(2).lower()])) return match.group(1) + result reason_regex_post = re.compile('(/post/[0-9]+)', flags=re.A) reason_regex_comment = re.compile('(/comment/[0-9]+)', flags=re.A) numbered_list_regex = re.compile('((\n|^)>*[0-9]+)[\.)] ', flags=re.A) image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/(chat_)?images\/[0-9]{{11,17}}r?\.webp", flags=re.A) video_link_regex = re.compile(f"https://(videos\.)?{SITE}\/(videos\/)?[0-9a-zA-Z._-]{{4,66}}\.({video_regex_extensions})", flags=re.A) asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\w\/]+.webp(\?x=\d+)?", flags=re.A) search_regex_1 = re.compile(r'[\0():|&*!<>]', flags=re.A) search_regex_2 = re.compile(r"'", flags=re.A) search_regex_3 = re.compile(r'\s+', flags=re.A) ###OWOIFY owo_word_regex = re.compile(r'[^\s]+', flags=re.A) owo_space_regex = re.compile(r'\s+', flags=re.A) owo_ignore_links_images_regex = re.compile(r'\]\(', flags=re.A) owo_ignore_emojis_regex = re.compile(r':[!#@a-z0-9_\-]+:', flags=re.I|re.A) owo_ignore_the_Regex = re.compile(r'\bthe\b', flags=re.I|re.A) ###LinkifyFilter tlds = ( # Original gTLDs and ccTLDs 'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at', 'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br', 'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl', 'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec', 'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf', 'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn', 'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo', 'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk', 'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo', 'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name', 'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg', 'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','qa','re','ro','rs','ru','rw', 'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st', 'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp', 'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn', 'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw', # New gTLDs 'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol', 'moe','mom','monster','new','news','one','online','pics','press','pub','site','blog', 'vip','win','world','wtf','xyz','video','host','art','media','wiki','tech', 'cooking','network','party','goog','markets','today','beauty','camp','top', 'red','city','quest','works','soy','zone','gl', ) protocols = ('http', 'https') sanitize_url_regex = re.compile( r"""\(*# Match any opening parentheses. \b(?"]*)? # /path/zz (excluding "unsafe" chars from RFC 1738, # except for ~, which happens in practice) (?:\#[^#\s\|\\\^\[\]`<>"]*)? # #hash (excluding "unsafe" chars from RFC 1738, # except for ~, which happens in practice) """.format( "|".join(sorted(protocols)), "|".join(sorted(tlds)) ), re.X | re.U, ) ###REDDIT #sanitizing reddit_mention_regex = re.compile('(^|[>\s])\/?(r|u)(\/[\w-]{2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A) reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(redd.it\/|((www\.|new\.)?reddit\.com|redd\.it)\/(u\/(?![\w-]{2,25}\/s\/)|user\/|(r\/\w{2,25}\/)?comments\/|r\/\w{2,25}\/?$))", flags=re.A) reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/comments(\/\w+){3}\/?.*", flags=re.A) #gevent reddit_s_url_regex = re.compile("https:\/\/(www\.)?reddit.com\/(r|u|user)\/[\w-]{2,25}\/s\/\w{10}\/?", flags=re.A) tiktok_t_url_regex = re.compile("https:\/\/(www\.|vm\.)?tiktok.com(\/t)?\/\w{9}\/?", flags=re.A) #run-time reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u|user)\/', flags=re.A) #post search subreddit_name_regex = re.compile('\w{2,25}', flags=re.A) ###YOUTUBE #sanitize youtube_regex = re.compile('(https:\/\/)?youtube\.com\/watch\?v=[\w-]{11}[\w&;=-]*<\/a>' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A) #sanitize and song yt_id_regex = re.compile('[\w-]{11}', flags=re.A) #orgy bare_youtube_regex = re.compile('https:\/\/youtube\.com\/watch\?v=[\w-]{11}[\w&;=]*', flags=re.I|re.A)