2022-11-03 22:59:18 +00:00
|
|
|
import random
|
2022-06-24 14:30:59 +00:00
|
|
|
import re
|
2022-07-14 14:43:05 +00:00
|
|
|
from random import choice, choices
|
2022-11-15 09:19:08 +00:00
|
|
|
from typing import List, Optional, Union
|
|
|
|
|
2022-12-11 23:44:34 +00:00
|
|
|
from .config.const import *
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-07-02 23:44:37 +00:00
|
|
|
NOT_IN_CODE_OR_LINKS = '(?!([^<]*<\/(code|pre|a)>|[^`\n]*`|(.|\n)*```))'
|
2023-06-30 18:29:53 +00:00
|
|
|
|
2023-04-23 13:22:41 +00:00
|
|
|
valid_username_regex = re.compile("^[\w\-]{3,25}$", flags=re.A)
|
2023-03-11 05:07:10 +00:00
|
|
|
|
2023-07-14 14:47:06 +00:00
|
|
|
mention_regex = re.compile('(?<![/\w])@([\w\-]{1,30})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
|
|
|
group_mention_regex = re.compile('(?<![/\w])!([\w\-]{3,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A|re.I)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-06-30 18:29:53 +00:00
|
|
|
everyone_regex = re.compile('(^|\s|>)!(everyone)' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2023-03-01 05:32:19 +00:00
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
valid_password_regex = re.compile("^.{8,100}$", flags=re.A)
|
|
|
|
|
|
|
|
marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A)
|
|
|
|
|
|
|
|
marseyaward_title_regex = re.compile("( *<img[^>]+>)+", flags=re.A)
|
|
|
|
|
|
|
|
|
2023-03-18 14:53:00 +00:00
|
|
|
emoji_name_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)
|
2022-09-10 05:37:11 +00:00
|
|
|
tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
|
2023-04-23 13:22:41 +00:00
|
|
|
hat_regex = re.compile("[\w\-() ,]{1,50}", flags=re.A)
|
2022-09-10 05:37:11 +00:00
|
|
|
description_regex = re.compile("[^<>&\n\t]{1,300}", flags=re.A)
|
2022-09-02 23:58:55 +00:00
|
|
|
|
2023-03-09 22:35:45 +00:00
|
|
|
badge_name_regex = re.compile(r"[^\/.]+", flags=re.A)
|
2022-12-30 16:28:24 +00:00
|
|
|
|
2023-04-23 13:22:41 +00:00
|
|
|
valid_sub_regex = re.compile("^[\w\-]{3,25}$", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
query_regex = re.compile("(\w+):(\S+)", flags=re.A)
|
|
|
|
|
2023-07-14 14:46:35 +00:00
|
|
|
poll_regex = re.compile("(^|\n|>)\$\$([^\$\n]+)\$\$\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
|
|
|
bet_regex = re.compile("(^|\n|>)##([^#\n]+)##\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
|
|
|
choice_regex = re.compile("(^|\n|>)&&([^&\n]+)&&\s*?" + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-10-20 23:06:55 +00:00
|
|
|
html_comment_regex = re.compile("<!--.*-->", flags=re.A)
|
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
title_regex = re.compile("[^\w ]", flags=re.A)
|
|
|
|
|
2023-04-23 13:22:41 +00:00
|
|
|
controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/\w{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-06-30 18:29:53 +00:00
|
|
|
spoiler_regex = re.compile('\|\|(.+?)\|\|' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
|
|
|
reddit_regex = re.compile('(^|\s|<p>|\()\/?(([ruRU])\/(\w|-){3,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
|
|
|
sub_regex = re.compile('(^|\s|<p>|\()\/?([hH]\/(\w|-){3,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-06-30 23:59:39 +00:00
|
|
|
strikethrough_regex = re.compile('(^|\s|>|")~{1,2}([^~]+)~{1,2}' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-04-23 13:22:41 +00:00
|
|
|
mute_regex = re.compile("\/mute @?([\w\-]{3,30}) ([0-9]+)", flags=re.A|re.I)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-04-23 13:22:41 +00:00
|
|
|
emoji_regex = re.compile(f"<p>\s*(:[!#@\w\-]{{1,36}}:\s*)+<\/p>", flags=re.A)
|
2023-06-24 16:03:05 +00:00
|
|
|
emoji_regex2 = re.compile(f'(?<!"):([!#@\w\-]{{1,36}}?):(?!([^<]*<\/(code|pre)>|[^`]*`))', flags=re.A)
|
2023-04-23 13:22:41 +00:00
|
|
|
emoji_regex3 = re.compile(f'(?<!"):([!@\w\-]{{1,35}}?):', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-06-24 14:45:19 +00:00
|
|
|
snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A)
|
2022-07-11 17:45:07 +00:00
|
|
|
snappy_youtube_regex = re.compile('<lite-youtube videoid="(.+?)" params="autoplay=1', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-10-19 10:39:03 +00:00
|
|
|
email_regex = re.compile(EMAIL_REGEX_PATTERN, flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-09-17 21:13:14 +00:00
|
|
|
utm_regex = re.compile('utm_[0-z]+=[0-z_]+&', flags=re.A)
|
|
|
|
utm_regex2 = re.compile('[?&]utm_[0-z]+=[0-z_]+', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-11-03 22:59:18 +00:00
|
|
|
slur_regex = re.compile(f"<[^>]*>|{slur_single_words}", flags=re.I|re.A)
|
|
|
|
slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words.upper()}", flags=re.A)
|
|
|
|
profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A)
|
|
|
|
profanity_regex_upper = re.compile(f"<[^>]*>|{profanity_single_words.upper()}", flags=re.A)
|
|
|
|
|
2023-03-25 18:18:48 +00:00
|
|
|
torture_regex = re.compile('(^|\s)(i|me)($|\s)', flags=re.I|re.A)
|
|
|
|
torture_regex2 = re.compile("(^|\s)(i'm)($|\s)", flags=re.I|re.A)
|
|
|
|
torture_regex3 = re.compile("(^|\s)(my|mine)($|\s)", flags=re.I|re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-06-22 05:20:33 +00:00
|
|
|
#matches ". ", does not match "..." or a.b
|
|
|
|
sentence_ending_regex = re.compile('(?<!\.)(\.)(?=$|\n|\s)', flags=re.I|re.A)
|
Add the "Misogynist" award to harass incels (#154)
Whazzup? This PR is the final solution to the incel problem. There's an old indian proverb that says: "never judge a man until you've walked two moons in his mocassins". In this case, it should be: "never judge a woman until you've walked 24 hrs in her high-heels".
The misogynist award is a comment-transforming award that "feminizes" comments. It does the following:
- makes text pink
- makes text lowercase
- removes "complicated" punctuation
- makes paragraphs into run-on sentences
- adds stereotypical girly remarks to the beginning or end of a paragraph.
For example:
INPUT
> What the fuck did you just fucking say about me, you little bitch? I'll have you know I graduated top of my class in the Navy Seals, and I've been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I'm the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You're fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that's just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little "clever" comment was about to bring down upon you, maybe you would have held your fucking tongue. But you couldn't, you didn't, and now you're paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You're fucking dead, kiddo.
OUTPUT
> im literally screaming, what the fuck did you just fucking say about me, you little bitch? ill have you know i graduated top of my class in the navy seals, and ive been involved in numerous secret raids on al-quaeda, and i have over 300 confirmed kills, i am trained in gorilla warfare and im the top sniper in the entire us armed forces, you are nothing to me but just another target, i will wipe you the fuck out with precision the likes of which has never been seen before on this earth, mark my fucking words, you think you can get away with saying that shit to me over the internet? think again, fucker, as we speak i am contacting my secret network of spies across the usa and your ip is being traced right now so you better prepare for the storm, maggot, the storm that wipes out the pathetic little thing you call your life, youre fucking dead, kid, i can be anywhere, anytime, and i can kill you in over seven hundred ways, and thats just with my bare hands, not only am i extensively trained in unarmed combat, but i have access to the entire arsenal of the united states marine corps and i will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit, if only you could have known what unholy retribution your little clever comment was about to bring down upon you, maybe you would have held your fucking tongue, but you couldnt, you didnt, and now youre paying the price, you goddamn idiot, i will shit fury all over you and you will drown in it, youre fucking dead, kiddo
It also sets the user's pfp to a random white woman. Well, psuedorandom - it's based off of the user's id, so each user will only ever have one pfp assigned to them, which I think is nifty.
Finally, it changes the name of the user toa girly name.
There is one small problem with the PR, which is simply that I manually added a badge for testing purposes. If you like this PR, I will submit the badge throught the proper chanels and fix it.
![image](/attachments/641c7276-ffe4-4e69-b3e9-aec9f4f94191)
Co-authored-by: Chuck Sneed <sneed@formerlychucks.net>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/154
Co-authored-by: HeyMoon <heymoon@noreply.fsdfsd.net>
Co-committed-by: HeyMoon <heymoon@noreply.fsdfsd.net>
2023-06-21 12:36:07 +00:00
|
|
|
normal_punctuation_regex = re.compile('(\"|\')', flags=re.I|re.A)
|
|
|
|
more_than_one_comma_regex = re.compile('\,\,+', flags=re.I|re.A)
|
2023-06-22 05:20:33 +00:00
|
|
|
#matches the various superlatives, but only if it as the start or end of a string or if it surrounded by spaces or is at the end of a word.
|
|
|
|
superlative_regex = re.compile('(?<=^|(?<=\s))(everyone|everybody|nobody|all|none|every|any|no one|anything)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#like above, except only when totally doesn't already prefix
|
|
|
|
totally_regex = re.compile('(?<=^|(?<=\s))(?<!totally )(into)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
greeting_regex = re.compile('(?<=^|(?<=\s))(hello|hi|hey|hecko)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
like_before_regex = re.compile('(?<=^|(?<=\s))(?<!like )(just|only)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
like_after_regex = re.compile('(?<=^|(?<=\s))(i mean)(?! like)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#match ! or ? but only if it isn't touching another ! or ?, or is in front of a letter
|
|
|
|
single_repeatable_punctuation = re.compile('(?<!!|\?)(!|\?)(?!!|\?)(?=\s|$)', flags=re.I|re.A)
|
2023-06-26 14:38:48 +00:00
|
|
|
#match "redpilled", to turn into "goodpilled" (extremely jankpilled but its whatever). Group 2 contained "ed" if exists
|
|
|
|
redpilled_regex = re.compile('(?<=^|(?<=\s))(redpill(ed)*)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#match "based and Xpilled". To be turned into "comfy X vibes". Note that "X" is in group 2. No conditional "ed", "ed" will always be present
|
|
|
|
based_and_x_pilled_regex = re.compile('(?<=^|(?<=\s))(based and ([a-zA-Z]*)pilled)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#match "based" to "comfy"
|
|
|
|
based_regex = re.compile('(?<=^|(?<=\s))(based)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#match "Xpilled". To be turned into "X vibes". Note that "X" is in group 2, "ed" in group 3
|
|
|
|
x_pilled_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+)pill(ed)?)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
|
|
|
#match "Xmaxxx". To be turned into "normalize good Xs". Note that "X" is in group 2, "s" (after X) in group 3
|
2023-07-02 17:41:46 +00:00
|
|
|
xmax_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
2023-06-26 14:38:48 +00:00
|
|
|
#same as above, except "Xmaxxed" this time (b/c I have crippling OCD and "normalized" isn't "normalize" + "ed") :marseyrage:
|
2023-07-02 17:41:46 +00:00
|
|
|
xmaxed_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+ed)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
2023-06-26 14:38:48 +00:00
|
|
|
#same as above, except "Xmaxxing" this time
|
2023-07-02 17:41:46 +00:00
|
|
|
xmaxing_regex = re.compile('(?<=^|(?<=\s))(([a-zA-Z]+?)(s)?max+ing)(?=$|\n|\s|[.?!,])', flags=re.I|re.A)
|
2023-06-22 05:20:33 +00:00
|
|
|
initial_part_regex = re.compile('(?<=^)(>+)', flags=re.I|re.A)
|
Add the "Misogynist" award to harass incels (#154)
Whazzup? This PR is the final solution to the incel problem. There's an old indian proverb that says: "never judge a man until you've walked two moons in his mocassins". In this case, it should be: "never judge a woman until you've walked 24 hrs in her high-heels".
The misogynist award is a comment-transforming award that "feminizes" comments. It does the following:
- makes text pink
- makes text lowercase
- removes "complicated" punctuation
- makes paragraphs into run-on sentences
- adds stereotypical girly remarks to the beginning or end of a paragraph.
For example:
INPUT
> What the fuck did you just fucking say about me, you little bitch? I'll have you know I graduated top of my class in the Navy Seals, and I've been involved in numerous secret raids on Al-Quaeda, and I have over 300 confirmed kills. I am trained in gorilla warfare and I'm the top sniper in the entire US armed forces. You are nothing to me but just another target. I will wipe you the fuck out with precision the likes of which has never been seen before on this Earth, mark my fucking words. You think you can get away with saying that shit to me over the Internet? Think again, fucker. As we speak I am contacting my secret network of spies across the USA and your IP is being traced right now so you better prepare for the storm, maggot. The storm that wipes out the pathetic little thing you call your life. You're fucking dead, kid. I can be anywhere, anytime, and I can kill you in over seven hundred ways, and that's just with my bare hands. Not only am I extensively trained in unarmed combat, but I have access to the entire arsenal of the United States Marine Corps and I will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit. If only you could have known what unholy retribution your little "clever" comment was about to bring down upon you, maybe you would have held your fucking tongue. But you couldn't, you didn't, and now you're paying the price, you goddamn idiot. I will shit fury all over you and you will drown in it. You're fucking dead, kiddo.
OUTPUT
> im literally screaming, what the fuck did you just fucking say about me, you little bitch? ill have you know i graduated top of my class in the navy seals, and ive been involved in numerous secret raids on al-quaeda, and i have over 300 confirmed kills, i am trained in gorilla warfare and im the top sniper in the entire us armed forces, you are nothing to me but just another target, i will wipe you the fuck out with precision the likes of which has never been seen before on this earth, mark my fucking words, you think you can get away with saying that shit to me over the internet? think again, fucker, as we speak i am contacting my secret network of spies across the usa and your ip is being traced right now so you better prepare for the storm, maggot, the storm that wipes out the pathetic little thing you call your life, youre fucking dead, kid, i can be anywhere, anytime, and i can kill you in over seven hundred ways, and thats just with my bare hands, not only am i extensively trained in unarmed combat, but i have access to the entire arsenal of the united states marine corps and i will use it to its full extent to wipe your miserable ass off the face of the continent, you little shit, if only you could have known what unholy retribution your little clever comment was about to bring down upon you, maybe you would have held your fucking tongue, but you couldnt, you didnt, and now youre paying the price, you goddamn idiot, i will shit fury all over you and you will drown in it, youre fucking dead, kiddo
It also sets the user's pfp to a random white woman. Well, psuedorandom - it's based off of the user's id, so each user will only ever have one pfp assigned to them, which I think is nifty.
Finally, it changes the name of the user toa girly name.
There is one small problem with the PR, which is simply that I manually added a badge for testing purposes. If you like this PR, I will submit the badge throught the proper chanels and fix it.
![image](/attachments/641c7276-ffe4-4e69-b3e9-aec9f4f94191)
Co-authored-by: Chuck Sneed <sneed@formerlychucks.net>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/154
Co-authored-by: HeyMoon <heymoon@noreply.fsdfsd.net>
Co-committed-by: HeyMoon <heymoon@noreply.fsdfsd.net>
2023-06-21 12:36:07 +00:00
|
|
|
|
2023-06-26 14:38:48 +00:00
|
|
|
#matches "the" or is, but only if it is not followed by "fucking". https://regex101.com/r/yxuYsQ/2
|
|
|
|
the_fucking_regex = re.compile('(?<=^|(?<=\s))((?:the|a)( (?:only))?|((that )?(?:is|are|was|were|will be|would be)( (?:your|her|his|their|no|a|not|to|too|so|this|the|our|what))?( (a|the))?)|is)(?=\s)(?! fucking)', flags=re.I|re.A)
|
|
|
|
#matches a single question mark but only if it isn't preceded by ", bitch"
|
|
|
|
bitch_question_mark_regex = re.compile('(?<!\?|\!)(?<!, bitch)(\?)(?!!|\?)(?=\s|$)', flags=re.I|re.A)
|
|
|
|
#matches a single exclamation point but only if it isn't preceded by ", motherfucker"
|
|
|
|
exclamation_point_regex = re.compile('(?<!!|\?)(?<!, motherfucker)(!)(?!!|\?)(?=\s|$)', flags=re.I|re.A)
|
|
|
|
|
2023-05-01 17:08:14 +00:00
|
|
|
image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/({hosts})\/|\/)).*?)\)', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-10-30 14:55:43 +00:00
|
|
|
video_regex_extensions = '|'.join(VIDEO_FORMATS)
|
2023-06-30 18:29:53 +00:00
|
|
|
video_sub_regex = re.compile(f'(?<!")(https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.({video_regex_extensions}))' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-10-30 14:55:43 +00:00
|
|
|
|
|
|
|
audio_regex_extensions = '|'.join(AUDIO_FORMATS)
|
2023-06-30 18:29:53 +00:00
|
|
|
audio_sub_regex = re.compile(f'(?<!")(https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.({audio_regex_extensions}))' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-10-30 14:55:43 +00:00
|
|
|
|
|
|
|
image_regex_extensions = '|'.join(IMAGE_FORMATS)
|
2023-06-24 14:25:05 +00:00
|
|
|
image_sub_regex = re.compile(f'(?<!")(https:\/\/[\w\-.#&/=\?@%;+,:]{{5,250}}(\.|\?format=)({image_regex_extensions})((\?|&)[\w\-.#&/=\?@%;+,:]*)?)(?=$|\s)', flags=re.I|re.A)
|
2023-03-07 01:40:48 +00:00
|
|
|
|
|
|
|
image_regex_extensions_no_gif = image_regex_extensions.replace('|gif', '')
|
|
|
|
imgur_regex = re.compile(f'(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.({image_regex_extensions_no_gif})', flags=re.I|re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A)
|
|
|
|
|
2023-04-25 08:01:51 +00:00
|
|
|
youtube_regex = re.compile('(<p>[^<]*)(https:\/\/youtube\.com\/watch\?[\w\-.#&/=?@%+]{7,})', flags=re.I|re.A)
|
2023-04-23 13:22:41 +00:00
|
|
|
yt_id_regex = re.compile('[\w\-]{5,20}', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-07-10 00:38:02 +00:00
|
|
|
rumble_regex = re.compile('https://rumble\.com/embed/([a-zA-Z0-9]*)/\?pub=([a-zA-Z0-9]*)',flags=re.I|re.A)
|
|
|
|
bare_youtube_regex = re.compile('https:\/\/youtube\.com\/watch\?([\w\-.#&/=?@%+]{7,})',flags=re.I|re.A)
|
|
|
|
twitch_regex = re.compile('(https:\/\/)?(www\.)?twitch.tv/(.*)',flags=re.I|re.A)
|
|
|
|
|
2023-06-30 18:29:53 +00:00
|
|
|
link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))" + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-07-12 01:25:18 +00:00
|
|
|
css_url_regex = re.compile('url\(\s*[\'"]?(.*)[\'"]?', flags=re.I|re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-11-07 20:49:49 +00:00
|
|
|
linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A)
|
|
|
|
|
2023-05-12 19:12:02 +00:00
|
|
|
allowed_domain_regex = re.compile("[a-z0-9\-.]+", flags=re.I|re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-08-14 03:14:36 +00:00
|
|
|
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)
|
2023-05-07 19:44:34 +00:00
|
|
|
twitter_to_nitter_regex = re.compile('(^|>|")https:\/\/twitter.com\/(?!i\/)', flags=re.A)
|
2023-07-14 11:13:54 +00:00
|
|
|
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(r|u|comments)\/", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-11-07 00:19:13 +00:00
|
|
|
color_regex = re.compile("[a-f0-9]{6}", flags=re.A)
|
2022-06-24 17:32:31 +00:00
|
|
|
|
2023-01-22 23:27:24 +00:00
|
|
|
# lazy match on the .*?, only match if there is trailing stuff
|
2022-10-06 06:17:28 +00:00
|
|
|
# Specifically match Snappy's way of formatting, this might break some losers' comments.
|
2023-02-24 07:29:46 +00:00
|
|
|
showmore_regex = re.compile(r"^(.*?</p>(?:</li></ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-07-06 11:49:13 +00:00
|
|
|
search_token_regex = re.compile('"([^"]*)"|(\S+)', flags=re.A)
|
|
|
|
|
|
|
|
git_regex = re.compile("ref: (refs/.+)", flags=re.A)
|
|
|
|
|
2023-07-02 16:40:31 +00:00
|
|
|
pronouns_regex = re.compile("([a-z]{1,7})\/[a-z]{1,7}(\/[a-z]{1,7})?", flags=re.A|re.I)
|
2022-07-11 16:46:08 +00:00
|
|
|
|
2022-11-11 09:49:43 +00:00
|
|
|
html_title_regex = re.compile("<title>(.{1,200})</title>", flags=re.I)
|
2022-11-11 09:24:54 +00:00
|
|
|
|
2023-01-23 06:22:01 +00:00
|
|
|
def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS_FOR_REPLACING):
|
2022-11-03 23:33:56 +00:00
|
|
|
group_num = 0
|
2022-11-03 23:14:55 +00:00
|
|
|
match_str = match.group(group_num)
|
|
|
|
if match_str.startswith('<'):
|
2022-11-03 23:33:56 +00:00
|
|
|
return match_str
|
2022-07-17 19:17:46 +00:00
|
|
|
else:
|
2022-11-03 23:14:55 +00:00
|
|
|
repl = replace_with[match_str.lower()]
|
2023-07-18 11:35:27 +00:00
|
|
|
if not upper or "<img" in repl:
|
|
|
|
return repl
|
|
|
|
else:
|
|
|
|
return repl.upper()
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2023-01-23 06:22:01 +00:00
|
|
|
def sub_matcher_upper(match, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS_FOR_REPLACING):
|
2022-11-03 22:59:18 +00:00
|
|
|
return sub_matcher(match, upper=True, replace_with=replace_with)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-11-03 22:59:18 +00:00
|
|
|
|
|
|
|
# TODO: make censoring a bit better
|
|
|
|
def sub_matcher_slurs(match, upper=False):
|
2023-01-23 06:22:01 +00:00
|
|
|
return sub_matcher(match, upper, replace_with=SLURS_FOR_REPLACING)
|
2022-11-03 22:59:18 +00:00
|
|
|
|
|
|
|
def sub_matcher_slurs_upper(match):
|
|
|
|
return sub_matcher_slurs(match, upper=True)
|
|
|
|
|
|
|
|
def sub_matcher_profanities(match, upper=False):
|
2023-06-24 15:01:22 +00:00
|
|
|
return sub_matcher(match, upper, replace_with=PROFANITIES_FOR_REPLACING)
|
2022-11-03 22:59:18 +00:00
|
|
|
|
|
|
|
def sub_matcher_profanities_upper(match):
|
|
|
|
return sub_matcher_profanities(match, upper=True)
|
|
|
|
|
2022-11-03 23:01:12 +00:00
|
|
|
def censor_slurs(body:Optional[str], logged_user):
|
2022-11-03 22:59:18 +00:00
|
|
|
if not body: return ""
|
|
|
|
def replace_re(body:str, regex:re.Pattern, regex_upper:re.Pattern, sub_func, sub_func_upper):
|
|
|
|
body = regex_upper.sub(sub_func_upper, body)
|
|
|
|
return regex.sub(sub_func, body)
|
2023-01-01 11:36:20 +00:00
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
|
2022-11-03 22:59:18 +00:00
|
|
|
body = replace_re(body, slur_regex, slur_regex_upper, sub_matcher_slurs, sub_matcher_slurs_upper)
|
2022-11-04 13:43:59 +00:00
|
|
|
if SITE_NAME == 'rDrama':
|
|
|
|
if not logged_user or logged_user == 'chat' or logged_user.profanityreplacer:
|
|
|
|
body = replace_re(body, profanity_regex, profanity_regex_upper, sub_matcher_profanities, sub_matcher_profanities_upper)
|
2022-11-03 22:59:18 +00:00
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
return body
|
|
|
|
|
2022-07-11 12:14:18 +00:00
|
|
|
commands = {
|
|
|
|
"fortune": FORTUNE_REPLIES,
|
|
|
|
"factcheck": FACTCHECK_REPLIES,
|
2022-07-13 13:06:59 +00:00
|
|
|
"8ball": EIGHTBALL_REPLIES,
|
2023-01-29 09:34:06 +00:00
|
|
|
"roll": range(1, 10000)
|
2022-07-11 12:14:18 +00:00
|
|
|
}
|
|
|
|
|
2023-01-23 07:38:16 +00:00
|
|
|
command_regex = re.compile("(\s|^)#(fortune|factcheck|8ball|roll)", flags=re.A|re.I)
|
2022-07-11 12:14:18 +00:00
|
|
|
|
|
|
|
def command_regex_matcher(match, upper=False):
|
2022-12-17 15:33:29 +00:00
|
|
|
result = str(choice(commands[match.group(2).lower()]))
|
2022-07-14 14:43:05 +00:00
|
|
|
if match.group(2) == 'roll':
|
|
|
|
color = tuple(choices(range(256), k=3))
|
|
|
|
result = f'<b style="color:rgb{color}">Your roll: {result}</b>'
|
2022-08-30 21:19:53 +00:00
|
|
|
return match.group(1) + result
|
2022-12-30 16:10:29 +00:00
|
|
|
|
2023-02-07 02:34:11 +00:00
|
|
|
reason_regex_post = re.compile('(/post/[0-9]+)', flags=re.A)
|
|
|
|
reason_regex_comment = re.compile('(/comment/[0-9]+)', flags=re.A)
|
2023-01-23 07:38:16 +00:00
|
|
|
|
2023-01-27 07:07:58 +00:00
|
|
|
discord_username_regex = re.compile("(\s|^|>).{2,32}#[0-9]{4}(?=[^0-9]|$)", flags=re.A)
|
2023-01-27 09:11:13 +00:00
|
|
|
|
2023-02-01 15:59:10 +00:00
|
|
|
numbered_list_regex = re.compile('((\s|^)[0-9]+)\. ', flags=re.A)
|
2023-02-18 21:03:23 +00:00
|
|
|
|
|
|
|
comment_link_regex = re.compile("/[0-9]+$", flags=re.A)
|
2023-02-19 19:31:26 +00:00
|
|
|
|
2023-05-22 14:57:58 +00:00
|
|
|
image_link_regex = re.compile(f"https://(i\.)?{SITE}\/(chat_)?images\/[0-9]{{11,17}}r?\.webp", flags=re.A)
|
2023-03-25 15:07:12 +00:00
|
|
|
|
2023-06-26 17:58:45 +00:00
|
|
|
video_link_regex = re.compile(f"https://(videos\.)?{SITE}\/(videos\/)?[0-9]{{11,17}}\.({video_regex_extensions})", flags=re.A)
|