MarseyWorld/files/helpers/regex.py

285 lines
14 KiB
Python
Raw Normal View History

import random
import re
2024-04-06 07:36:01 +00:00
from flask import g
[DO NOT MERGE] import detanglation (#442) * move Base definition to files.classes.__init__.py * fix ImportError * move userpage listing to users.py * don't import the app from classes * consts: set default values to avoid crashes consts: warn if the secret key is the default config value * card view: sneed (user db schema) * cloudflare: use DEFAULT_CONFIG_VALUE * const: set default values * decouple media.py from __main__ * pass database to avoid imports * import cleanup and import request not in const, but in the requests mega import * move asset_submissions site check to __init__ * asset submissions feature flag * flag * g.is_tor * don't import request where it's not needed * i think this is fine * mail: move to own routes and helper * wrappers * required wrappers move * unfuck wrappers a bit * move snappy quotes and marseys to stateful consts * marsify * :pepodrool: * fix missing import * import cache * ...and settings.py * and static.py * static needs cache * route * lmao all of the jinja shit was in feeds.py amazing * classes should only import what they need from flask * import Response * hdjbjdhbhjf * ... * dfdfdfdf * make get a non-required import * isort imports (mostly) * but actually * configs * reload config on import * fgfgfgfg * config * config * initialize snappy and test * cookie of doom debug * edfjnkf * xikscdfd * debug config * set session cookie domain, i think this fixes the can't login bug * sdfbgnhvfdsghbnjfbdvvfghnn * hrsfxgf * dump the entire config on a request * kyskyskyskyskyskyskyskyskys * duifhdskfjdfd * dfdfdfdfdfdfdfdfdfdfdfdf * dfdfdfdf * imoprt all of the consts beacuse fuck it * 😭 * dfdfdfdfdfdfsdasdf * print the entire session * rffdfdfjkfksj * fgbhffh * not the secret keys * minor bug fixes * be helpful in the warning * gfgfgfg * move warning lower * isort main imports (i hope this doesn't fuck something up) * test * session cookie domain redux * dfdfdfd * try only importing Flask * formkeys fix * y * :pepodrool: * route helper * remove before flight * dfdfdfdfdf * isort classes * isort helpers * move check_for_alts to routehelpers and also sort imports and get rid of unused ones * that previous commit but actkally * readd the cache in a dozen places they were implicitly imported * use g.is_tor instead of request.headers. bla bla bla * upgrade streamers to their own route file * get rid of unused imports in __main__ * fgfgf * don't pull in the entire ORM where we don't need it * features * explicit imports for the get helper * explicit imports for the get helper redux * testing allroutes * remove unused import * decouple flask from classes * syntax fix also remember these have side fx for some reason (why?) * move side effects out of the class * posts * testing on devrama * settings * reloading * settingssdsdsds * streamer features * site settings * testing settings on devrama * import * fix modlog * remove debug stuff * revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6 * archiveorg to _archiveorg * skhudkfkjfd * fix cron for PCM * fix bugs that snekky wants me to * Fix call to realbody passing db, standardize kwarg * test * import check_for_alts from the right place * cloudflare * testing on devrama * fix cron i think * shadow properly * tasks * Remove print which will surely be annoying in prod. * v and create new session * use files.classes * make errors import little and fix rare 500 in /allow_nsfw * Revert "use files.classes" This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6. * pass v to media functions rather than using g * fix * dfdfdfdfd * cleanup, py type checking is dumb so don't use it where it causes issues * Fix some merge bugs, add DEFAULT_RATELIMIT to main. * Fix imports on sqlalchemy expressions. * `from random import random` is an error. * Fix replies db param. * errors: fix missing import * fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text * Fix signup formkey. * fix 2 500s * propagate db to submissions * fix replies * dfdfdfdf * Fix verifiedcolor. * is_manual * can't use getters outside of an app context * don't attempt to do gumroad on sites where it's not enabled * don't attempt to do gumraod on sites's where it's unnecessary * Revert "don't attempt to do gumroad on sites where it's not enabled" This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3. * fix 500 * validate media type Co-authored-by: TLSM <duolsm@outlook.com>
2022-11-15 09:19:08 +00:00
from files.classes.media import Media
from .config.const import *
NOT_IN_CODE_OR_LINKS = '(?!([^<]*<\/(code|pre|a)>|[^`\n]*`))'
2023-09-15 01:24:16 +00:00
valid_username_regex = re.compile("^[\w-]{3,25}$", flags=re.A)
valid_username_patron_regex = re.compile("^[\w-]{1,25}$", flags=re.A)
2023-03-11 05:07:10 +00:00
2023-09-15 01:24:16 +00:00
mention_regex = re.compile('(?<![:/\w])@([\w-]{1,30})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
group_mention_regex = re.compile('(?<![:/\w])!([\w-]{3,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A|re.I)
chat_adding_regex = re.compile('\+@([\w-]{1,30})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
chat_kicking_regex = re.compile('\-@([\w-]{1,30})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
2024-03-10 14:27:21 +00:00
everyone_regex = re.compile('(^|\s|>)!(everyone)' + NOT_IN_CODE_OR_LINKS, flags=re.A)
2023-03-01 05:32:19 +00:00
valid_password_regex = re.compile("^.{8,100}$", flags=re.A)
marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A)
marseyaward_title_regex = re.compile("( *<img[^>]+>)+", flags=re.A)
2023-03-18 14:53:00 +00:00
emoji_name_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)
2022-09-10 05:37:11 +00:00
tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
2023-10-17 17:22:05 +00:00
hat_name_regex = re.compile("[\w\-() ,]{1,50}", flags=re.A)
2022-09-10 05:37:11 +00:00
description_regex = re.compile("[^<>&\n\t]{1,300}", flags=re.A)
2023-03-09 22:35:45 +00:00
badge_name_regex = re.compile(r"[^\/.]+", flags=re.A)
2022-12-30 16:28:24 +00:00
2023-09-15 01:24:16 +00:00
hole_group_name_regex = re.compile("^[\w-]{3,25}$", flags=re.A)
query_regex = re.compile("(\w+):(\S+)", flags=re.A)
2023-10-17 09:15:07 +00:00
poll_regex = re.compile("(^|\n|>)\$\$([^\n]+?)\$\$\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
bet_regex = re.compile("(^|\n|>)##([^\n]+?)##\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
2023-10-17 09:14:39 +00:00
choice_regex = re.compile("(^|\n|>)&&([^\n]+?)&&\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
html_comment_regex = re.compile("<!--.*-->", flags=re.A)
title_regex = re.compile("[^\w ]", flags=re.A)
2024-02-29 14:56:24 +00:00
controversial_regex = re.compile('https:\/\/old\.reddit\.com\/r\/\w{2,20}\/comments\/[\w\-.#&/=\?@%+]{5,250}', flags=re.A)
spoiler_regex = re.compile('\|\|(.+?)\|\|' + NOT_IN_CODE_OR_LINKS, flags=re.A)
hole_mention_regex = re.compile('(?<![\w/])\/?([hH]\/[\w-]{3,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
strikethrough_regex = re.compile('~{1,2}([^~]+)~{1,2}' + NOT_IN_CODE_OR_LINKS, flags=re.A)
2023-09-15 01:24:16 +00:00
mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I)
2024-03-31 00:26:35 +00:00
unmute_regex = re.compile("\/unmute @?([\w-]{1,30})", flags=re.A|re.I)
emoji_regex = re.compile(f"<p>\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A)
emoji_regex2 = re.compile(f'(?<!"):([!#@\w\-]{{1,72}}?):(?![^<]*<\/(code|pre)>)', flags=re.A)
snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A)
2023-08-14 11:56:55 +00:00
email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A)
torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A)
2023-10-29 14:41:52 +00:00
torture_regex2 = re.compile("(^|\s)(i'?m)($|\s)", flags=re.I|re.A)
torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A)
#matches ". ", does not match "..." or a.b
sentence_ending_regex = re.compile('(?<!\.)(\.)(?=$|\n|\s)', flags=re.I|re.A)
Add the "Misogynist" award to harass incels (#154) Whazzup? This PR is the final solution to the incel problem. There's an old indian proverb that says: "never judge a man until you've walked two moons in his mocassins". In this case, it should be: "never judge a woman until you've walked 24 hrs in her high-heels". The misogynist award is a comment-transforming award that "feminizes" comments. It does the following: - makes text pink - makes text lowercase - removes "complicated" punctuation - makes paragraphs into run-on sentences - adds stereotypical girly remarks to the beginning or end of a paragraph. For example: INPUT > What the fuck did you just fucking say about me, you little bitch? I'll have you know I graduated top of my class in the Navy Seals, and I've been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I'm the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You're fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that's just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little "clever" comment was about to bring down upon you, maybe you would have held your fucking tongue. But you couldn't, you didn't, and now you're paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You're fucking dead, kiddo. OUTPUT > im literally screaming, what the fuck did you just fucking say about me, you little bitch? ill have you know i graduated top of my class in the navy seals, and ive been involved in numerous secret raids on al-quaeda, and i have over 300 confirmed kills, i am trained in gorilla warfare and im the top sniper in the entire us armed forces, you are nothing to me but just another target, i will wipe you the fuck out with precision the likes of which has never been seen before on this earth, mark my fucking words, you think you can get away with saying that shit to me over the internet? think again, fucker, as we speak i am contacting my secret network of spies across the usa and your ip is being traced right now so you better prepare for the storm, maggot, the storm that wipes out the pathetic little thing you call your life, youre fucking dead, kid, i can be anywhere, anytime, and i can kill you in over seven hundred ways, and thats just with my bare hands, not only am i extensively trained in unarmed combat, but i have access to the entire arsenal of the united states marine corps and i will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit, if only you could have known what unholy retribution your little clever comment was about to bring down upon you, maybe you would have held your fucking tongue, but you couldnt, you didnt, and now youre paying the price, you goddamn idiot, i will shit fury all over you and you will drown in it, youre fucking dead, kiddo It also sets the user's pfp to a random white woman. Well, psuedorandom - it's based off of the user's id, so each user will only ever have one pfp assigned to them, which I think is nifty. Finally, it changes the name of the user toa girly name. There is one small problem with the PR, which is simply that I manually added a badge for testing purposes. If you like this PR, I will submit the badge throught the proper chanels and fix it. ![image](/attachments/641c7276-ffe4-4e69-b3e9-aec9f4f94191) Co-authored-by: Chuck Sneed <sneed@formerlychucks.net> Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/154 Co-authored-by: HeyMoon <heymoon@noreply.fsdfsd.net> Co-committed-by: HeyMoon <heymoon@noreply.fsdfsd.net>
2023-06-21 12:36:07 +00:00
normal_punctuation_regex = re.compile('(\"|\')', flags=re.I|re.A)
more_than_one_comma_regex = re.compile('\,\,+', flags=re.I|re.A)
#matches the various superlatives, but only if it as the start or end of a string or if it surrounded by spaces or is at the end of a word.
superlative_regex = re.compile('(?<=^|(?<=\s))(everyone|everybody|nobody|all|none|every|any|no one|anything)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#like above, except only when totally doesn't already prefix
totally_regex = re.compile('(?<=^|(?<=\s))(?<!totally )(into)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
greeting_regex = re.compile('(?<=^|(?<=\s))(hello|hi|hey|hecko)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
like_before_regex = re.compile('(?<=^|(?<=\s))(?<!like )(just|only)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
like_after_regex = re.compile('(?<=^|(?<=\s))(i mean)(?! like)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#match ! or ? but only if it isn't touching another ! or ?, or is in front of a letter
single_repeatable_punctuation = re.compile('(?<!!|\?)(!|\?)(?!!|\?)(?=\s|$)', flags=re.I|re.A)
#match "redpilled", to turn into "goodpilled" (extremely jankpilled but its whatever). Group 2 contained "ed" if exists
redpilled_regex = re.compile('(?<=^|(?<=\s))(redpill(ed)*)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#match "based and Xpilled". To be turned into "comfy X vibes". Note that "X" is in group 2. No conditional "ed", "ed" will always be present
based_and_x_pilled_regex = re.compile('(?<=^|(?<=\s))(based and ([a-zA-Z]*)pilled)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#match "based" to "comfy"
based_regex = re.compile('(?<=^|(?<=\s))(based)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#match "Xpilled". To be turned into "X vibes". Note that "X" is in group 2, "ed" in group 3
x_pilled_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+)pill(ed)?)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#match "Xmaxxx". To be turned into "normalize good Xs". Note that "X" is in group 2, "s" (after X) in group 3
xmax_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#same as above, except "Xmaxxed" this time (b/c I have crippling OCD and "normalized" isn't "normalize" + "ed") :marseyrage:
xmaxed_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+ed)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
#same as above, except "Xmaxxing" this time
xmaxing_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+ing)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
initial_part_regex = re.compile('(?<=^)(>+)', flags=re.I|re.A)
Add the "Misogynist" award to harass incels (#154) Whazzup? This PR is the final solution to the incel problem. There's an old indian proverb that says: "never judge a man until you've walked two moons in his mocassins". In this case, it should be: "never judge a woman until you've walked 24 hrs in her high-heels". The misogynist award is a comment-transforming award that "feminizes" comments. It does the following: - makes text pink - makes text lowercase - removes "complicated" punctuation - makes paragraphs into run-on sentences - adds stereotypical girly remarks to the beginning or end of a paragraph. For example: INPUT > What the fuck did you just fucking say about me, you little bitch? I'll have you know I graduated top of my class in the Navy Seals, and I've been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I'm the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You're fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that's just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little "clever" comment was about to bring down upon you, maybe you would have held your fucking tongue. But you couldn't, you didn't, and now you're paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You're fucking dead, kiddo. OUTPUT > im literally screaming, what the fuck did you just fucking say about me, you little bitch? ill have you know i graduated top of my class in the navy seals, and ive been involved in numerous secret raids on al-quaeda, and i have over 300 confirmed kills, i am trained in gorilla warfare and im the top sniper in the entire us armed forces, you are nothing to me but just another target, i will wipe you the fuck out with precision the likes of which has never been seen before on this earth, mark my fucking words, you think you can get away with saying that shit to me over the internet? think again, fucker, as we speak i am contacting my secret network of spies across the usa and your ip is being traced right now so you better prepare for the storm, maggot, the storm that wipes out the pathetic little thing you call your life, youre fucking dead, kid, i can be anywhere, anytime, and i can kill you in over seven hundred ways, and thats just with my bare hands, not only am i extensively trained in unarmed combat, but i have access to the entire arsenal of the united states marine corps and i will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit, if only you could have known what unholy retribution your little clever comment was about to bring down upon you, maybe you would have held your fucking tongue, but you couldnt, you didnt, and now youre paying the price, you goddamn idiot, i will shit fury all over you and you will drown in it, youre fucking dead, kiddo It also sets the user's pfp to a random white woman. Well, psuedorandom - it's based off of the user's id, so each user will only ever have one pfp assigned to them, which I think is nifty. Finally, it changes the name of the user toa girly name. There is one small problem with the PR, which is simply that I manually added a badge for testing purposes. If you like this PR, I will submit the badge throught the proper chanels and fix it. ![image](/attachments/641c7276-ffe4-4e69-b3e9-aec9f4f94191) Co-authored-by: Chuck Sneed <sneed@formerlychucks.net> Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/154 Co-authored-by: HeyMoon <heymoon@noreply.fsdfsd.net> Co-committed-by: HeyMoon <heymoon@noreply.fsdfsd.net>
2023-06-21 12:36:07 +00:00
#matches "the" or is, but only if it is not followed by "fucking". https://regex101.com/r/yxuYsQ/2
the_fucking_regex = re.compile('(?<=^|(?<=\s))((?:the|a)( (?:only))?|((that )?(?:is|are|was|were|will be|would be)( (?:your|her|his|their|no|a|not|to|too|so|this|the|our|what))?( (a|the))?)|is)(?=\s)(?! fucking)' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
#matches a single question mark but only if it isn't preceded by ", bitch"
bitch_question_mark_regex = re.compile('(?<!\?|\!)(?<!, bitch)(\?)(?!!|\?)(?=\s|$)' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
#matches a single exclamation point but only if it isn't preceded by ", motherfucker"
exclamation_point_regex = re.compile('(?<!!|\?)(?<!, motherfucker)(!)(?!!|\?)(?=\s|$)' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/({hosts})\/|\/)).*?)\)', flags=re.A)
2022-10-30 14:55:43 +00:00
video_regex_extensions = '|'.join(VIDEO_FORMATS)
video_sub_regex = re.compile(f'(?<!")(https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.({video_regex_extensions}))' + NOT_IN_CODE_OR_LINKS, flags=re.A)
2022-10-30 14:55:43 +00:00
2024-04-06 07:36:01 +00:00
def video_sub_regex_matcher(match):
url = match.group(1)
if url.startswith(SITE_FULL_VIDEOS):
filename = '/videos/' + url.split(f'{SITE_FULL_VIDEOS}/')[1]
g.db.flush()
posterurl = g.db.query(Media.posterurl).filter_by(filename=filename).one_or_none()
2024-04-06 07:53:38 +00:00
if posterurl:
return f'<p class="resizable"><video poster="{posterurl[0]}" controls preload="none" src="{url}"></video></p>'
2024-04-06 07:36:01 +00:00
return f'<p class="resizable"><video controls preload="none" src="{url}"></video></p>'
2022-10-30 14:55:43 +00:00
audio_regex_extensions = '|'.join(AUDIO_FORMATS)
audio_sub_regex = re.compile(f'(?<!")(https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.({audio_regex_extensions}))' + NOT_IN_CODE_OR_LINKS, flags=re.A)
2022-10-30 14:55:43 +00:00
image_regex_extensions = '|'.join(IMAGE_FORMATS)
image_sub_regex = re.compile(f'(?<!")(https:\/\/[\w\-.#&/=\?@%;+,:]{{5,250}}(\.|\?format=)({image_regex_extensions})((\?|&)[\w\-.#&/=\?@%;+,:]*)?)(?=$|\s|<)' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
2023-03-07 01:40:48 +00:00
image_regex_extensions_no_gif = image_regex_extensions.replace('|gif', '')
imgur_regex = re.compile(f'^(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.({image_regex_extensions_no_gif})', flags=re.I|re.A)
giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A)
2023-08-16 19:07:33 +00:00
rumble_regex = re.compile('https://rumble\.com/embed/([a-zA-Z0-9]*)(/\?pub=([a-zA-Z0-9]*))?', flags=re.I|re.A)
2023-08-16 19:07:33 +00:00
twitch_regex = re.compile('(https:\/\/)?(www\.)?twitch.tv\/(.*)', flags=re.I|re.A)
link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))" + NOT_IN_CODE_OR_LINKS, flags=re.A)
2023-09-15 01:40:53 +00:00
css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW
linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A)
greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A)
allowed_domain_regex = re.compile("[a-z0-9\-.]+", flags=re.I|re.A)
twitter_domain_regex = re.compile('(^|>|")https:\/\/twitter.com\/(?!i\/)', flags=re.A)
2024-02-08 00:36:54 +00:00
instagram_to_imgsed_regex = re.compile('(^|>|")https:\/\/instagram.com\/(?!reel\/)', flags=re.A)
2022-11-07 00:19:13 +00:00
color_regex = re.compile("[a-f0-9]{6}", flags=re.A)
2022-06-24 17:32:31 +00:00
# lazy match on the .*?, only match if there is trailing stuff
# Specifically match Snappy's way of formatting, this might break some losers' comments.
showmore_regex = re.compile(r"^(.*?</p>(?:</li></ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)
2022-07-06 11:49:13 +00:00
search_token_regex = re.compile('"([^"]*)"|(\S+)', flags=re.A)
git_regex = re.compile("ref: (refs/.+)", flags=re.A)
pronouns_regex = re.compile("(\w{1,7})\/\w{1,7}(\/\w{1,7})?", flags=re.A)
2022-07-11 16:46:08 +00:00
2022-11-11 09:49:43 +00:00
html_title_regex = re.compile("<title>(.{1,200})</title>", flags=re.I)
2024-04-20 20:18:57 +00:00
excessive_css_scale_regex = re.compile("scale\([^)]*?(\d{2})", flags=re.A)
2024-04-25 23:27:06 +00:00
word_alert_regex = re.compile(r'\b(764|o9a|9 angles|hurtcore|pthc)\b', flags=re.A|re.I)
2022-07-11 12:14:18 +00:00
commands = {
"fortune": FORTUNE_REPLIES,
"factcheck": FACTCHECK_REPLIES,
2022-07-13 13:06:59 +00:00
"8ball": EIGHTBALL_REPLIES,
2023-08-06 08:07:48 +00:00
"coinflip": COINFLIP_HEADS_OR_TAILS,
2022-07-11 12:14:18 +00:00
}
2024-02-12 17:02:12 +00:00
command_regex = re.compile("(\s|^)#(fortune|factcheck|8ball|roll([1-9][0-9]*)|coinflip)", flags=re.A|re.I)
2022-07-11 12:14:18 +00:00
2024-02-29 15:05:15 +00:00
def command_regex_matcher(match):
2023-08-07 13:11:10 +00:00
if match.group(2) == 'coinflip' and random.random() < 0.01:
2023-08-06 08:07:48 +00:00
result = COINFLIP_EDGE
else:
2024-02-12 16:13:26 +00:00
if match.group(2).startswith('roll'):
max_num = int(match.group(3))
2024-02-21 20:08:33 +00:00
result = random.randint(1, max_num)
color = tuple(random.choices(range(256), k=3))
2024-02-12 16:13:26 +00:00
result = f'<b style="color:rgb{color}">Your roll (1-{max_num}): {result}</b>'
else:
2024-02-21 20:08:33 +00:00
result = str(random.choice(commands[match.group(2).lower()]))
2022-08-30 21:19:53 +00:00
return match.group(1) + result
2022-12-30 16:10:29 +00:00
reason_regex_post = re.compile('(/post/[0-9]+)', flags=re.A)
reason_regex_comment = re.compile('(/comment/[0-9]+)', flags=re.A)
2023-01-23 07:38:16 +00:00
numbered_list_regex = re.compile('((\n|^)[> ]*[0-9]+)[\.)] ', flags=re.A)
2023-02-18 21:03:23 +00:00
2023-08-16 23:48:15 +00:00
image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/(chat_)?images\/[0-9]{{11,17}}r?\.webp", flags=re.A)
2023-03-25 15:07:12 +00:00
2023-07-29 00:20:00 +00:00
video_link_regex = re.compile(f"https://(videos\.)?{SITE}\/(videos\/)?[0-9a-zA-Z._-]{{4,66}}\.({video_regex_extensions})", flags=re.A)
2023-08-17 00:07:13 +00:00
asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\w\/]+.webp(\?x=\d+)?", flags=re.A)
2023-09-08 18:00:04 +00:00
2023-10-05 16:14:17 +00:00
search_regex_1 = re.compile(r'[\0():|&*!<>]', flags=re.A)
search_regex_2 = re.compile(r"'", flags=re.A)
search_regex_3 = re.compile(r'\s+', flags=re.A)
###OWOIFY
owo_word_regex = re.compile(r'[^\s]+', flags=re.A)
owo_space_regex = re.compile(r'\s+', flags=re.A)
owo_ignore_links_images_regex = re.compile(r'\]\(', flags=re.A)
owo_ignore_emojis_regex = re.compile(r':[!#@a-z0-9_\-]+:', flags=re.I|re.A)
owo_ignore_the_Regex = re.compile(r'\bthe\b', flags=re.I|re.A)
###LinkifyFilter
tlds = ( # Original gTLDs and ccTLDs
'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl',
'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec',
'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf',
'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn',
'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo',
'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk',
'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo',
'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name',
'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg',
2024-03-02 09:32:59 +00:00
'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','qa','re','ro','rs','ru','rw',
'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st',
'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp',
'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn',
'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw',
# New gTLDs
'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol',
2024-02-16 19:34:21 +00:00
'moe','mom','monster','new','news','one','online','pics','press','pub','site','blog',
'vip','win','world','wtf','xyz','video','host','art','media','wiki','tech',
'cooking','network','party','goog','markets','today','beauty','camp','top',
2024-03-27 10:10:43 +00:00
'red','city','quest','works','soy','zone','gl',
)
protocols = ('http', 'https')
sanitize_url_regex = re.compile(
r"""\(*# Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
# /path/zz (excluding "unsafe" chars from RFC 1738,
# except for ~, which happens in practice)
(?:\#[^#\s\|\\\^\[\]`<>"]*)?
# #hash (excluding "unsafe" chars from RFC 1738,
# except for ~, which happens in practice)
""".format(
"|".join(sorted(protocols)), "|".join(sorted(tlds))
),
re.X | re.U,
)
###REDDIT
2023-09-08 18:00:04 +00:00
#sanitizing
reddit_mention_regex = re.compile('(^|[>\s])\/?(r|u)(\/[\w-]{2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
2024-04-11 12:43:56 +00:00
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(redd.it\/|((www\.|new\.)?reddit\.com|redd\.it)\/(u\/(?![\w-]{2,25}\/s\/)|user\/|(r\/\w{2,25}\/)?comments\/|r\/\w{2,25}\/?$))", flags=re.A)
reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/comments(\/\w+){3}\/?.*", flags=re.A)
2023-09-08 18:00:04 +00:00
#gevent
reddit_s_url_regex = re.compile("https:\/\/(www\.)?reddit.com\/(r|u|user)\/[\w-]{2,25}\/s\/\w{10}\/?", flags=re.A)
2024-02-28 20:33:23 +00:00
tiktok_t_url_regex = re.compile("https:\/\/(www\.|vm\.)?tiktok.com(\/t)?\/\w{9}\/?", flags=re.A)
2023-09-08 18:00:04 +00:00
#run-time
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u|user)\/', flags=re.A)
#post search
subreddit_name_regex = re.compile('\w{2,25}', flags=re.A)
###YOUTUBE
#sanitize
2024-02-07 03:14:50 +00:00
youtube_regex = re.compile('<a href="(https:\/\/youtube\.com\/watch\?v=[\w-]{11}[\w&;=-]*)" rel="nofollow noopener" target="_blank">(https:\/\/)?youtube\.com\/watch\?v=[\w-]{11}[\w&;=-]*<\/a>' + NOT_IN_CODE_OR_LINKS, flags=re.I|re.A)
#sanitize and song
yt_id_regex = re.compile('[\w-]{11}', flags=re.A)
#orgy
bare_youtube_regex = re.compile('https:\/\/youtube\.com\/watch\?v=[\w-]{11}[\w&;=]*', flags=re.I|re.A)