2022-06-24 14:30:59 +00:00
|
|
|
import re
|
2022-07-08 16:21:13 +00:00
|
|
|
from .const import *
|
2022-07-14 14:43:05 +00:00
|
|
|
from random import choice, choices
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-08-17 19:41:09 +00:00
|
|
|
valid_username_chars = 'a-zA-Z0-9_\-'
|
|
|
|
valid_username_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A)
|
2022-09-09 09:39:03 +00:00
|
|
|
mention_regex = re.compile('(^|\s|>)@(([a-zA-Z0-9_\-]){1,30})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
valid_password_regex = re.compile("^.{8,100}$", flags=re.A)
|
|
|
|
|
|
|
|
marseyaward_body_regex = re.compile(">[^<\s+]|[^>\s+]<", flags=re.A)
|
|
|
|
|
|
|
|
marseyaward_title_regex = re.compile("( *<img[^>]+>)+", flags=re.A)
|
|
|
|
|
|
|
|
|
2022-09-10 05:37:11 +00:00
|
|
|
marsey_regex = re.compile("marsey[a-z0-9]{1,24}", flags=re.A)
|
|
|
|
tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
|
2022-09-03 01:57:03 +00:00
|
|
|
hat_regex = re.compile("[a-zA-Z0-9\-() ,_]{1,50}", flags=re.A)
|
2022-09-10 05:37:11 +00:00
|
|
|
description_regex = re.compile("[^<>&\n\t]{1,300}", flags=re.A)
|
2022-09-02 23:58:55 +00:00
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-08-11 13:34:56 +00:00
|
|
|
valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,25}$", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
query_regex = re.compile("(\w+):(\S+)", flags=re.A)
|
|
|
|
|
|
|
|
poll_regex = re.compile("\s*\$\$([^\$\n]+)\$\$\s*", flags=re.A)
|
|
|
|
bet_regex = re.compile("\s*\$\$\$([^\$\n]+)\$\$\$\s*", flags=re.A)
|
|
|
|
choice_regex = re.compile("\s*&&([^\$\n]+)&&\s*", flags=re.A)
|
|
|
|
|
|
|
|
title_regex = re.compile("[^\w ]", flags=re.A)
|
|
|
|
|
|
|
|
based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)
|
|
|
|
|
|
|
|
controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A)
|
|
|
|
|
|
|
|
fishylinks_regex = re.compile("https?://\S+", flags=re.A)
|
|
|
|
|
|
|
|
spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A)
|
|
|
|
reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
|
|
|
|
sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})(?![^<]*<\/(code|pre|a)>)', flags=re.A)
|
|
|
|
|
2022-06-28 06:05:49 +00:00
|
|
|
strikethrough_regex = re.compile('(^|\s|>)~{1,2}([^~]+)~{1,2}', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-09-09 09:39:03 +00:00
|
|
|
mute_regex = re.compile("\/mute @([a-z0-9_\-]{3,30}) ([0-9]+)", flags=re.A|re.I)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
emoji_regex = re.compile(f"<p>\s*(:[!#@]{{0,3}}[{valid_username_chars}]+:\s*)+<\/p>", flags=re.A)
|
2022-07-03 08:00:42 +00:00
|
|
|
emoji_regex2 = re.compile(f'(?<!"):([!#@{valid_username_chars}]{{1,36}}?):', flags=re.A)
|
|
|
|
emoji_regex3 = re.compile(f'(?<!"):([!@{valid_username_chars}]{{1,35}}?):', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-06-24 14:45:19 +00:00
|
|
|
snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A)
|
2022-07-11 17:45:07 +00:00
|
|
|
snappy_youtube_regex = re.compile('<lite-youtube videoid="(.+?)" params="autoplay=1', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
email_regex = re.compile('[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', flags=re.A|re.I)
|
|
|
|
|
2022-09-17 21:13:14 +00:00
|
|
|
utm_regex = re.compile('utm_[0-z]+=[0-z_]+&', flags=re.A)
|
|
|
|
utm_regex2 = re.compile('[?&]utm_[0-z]+=[0-z_]+', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-07-17 19:17:46 +00:00
|
|
|
slur_regex = re.compile(f"<[^>]*>|{single_words}", flags=re.I|re.A)
|
|
|
|
slur_regex_upper = re.compile(f"<[^>]*>|{single_words.upper()}", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A)
|
|
|
|
torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A)
|
|
|
|
torture_regex_exclude = re.compile('^\s*>', flags=re.A)
|
|
|
|
|
|
|
|
|
|
|
|
image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/)).*?)\)', flags=re.A)
|
|
|
|
|
2022-08-26 18:15:26 +00:00
|
|
|
video_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A)
|
|
|
|
audio_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.(jpg|png|jpeg|webp)', flags=re.I|re.A)
|
|
|
|
giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A)
|
|
|
|
|
|
|
|
youtube_regex = re.compile('(<p>[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A)
|
|
|
|
|
|
|
|
yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
|
|
|
|
|
|
|
|
image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+,:]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp)(\?[\w\-.#&/=\?@%;+,:]*)?)($|\s)", flags=re.I|re.A)
|
|
|
|
|
2022-06-25 05:28:43 +00:00
|
|
|
link_fix_regex = re.compile("(\[.*?\]\()(?!http|/)(.*?\))", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-08-05 17:09:41 +00:00
|
|
|
css_url_regex = re.compile('url\(\s*[\'"]?(.*?)[\'"]?\s*\)', flags=re.I|re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-10-05 22:47:12 +00:00
|
|
|
procoins_li = (0,2500,5000,10000,25000,50000,100000,250000)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A)
|
|
|
|
|
|
|
|
greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A)
|
|
|
|
|
|
|
|
ascii_only_regex = re.compile("[ -~]+", flags=re.A)
|
|
|
|
|
2022-08-14 03:14:36 +00:00
|
|
|
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)
|
2022-08-13 08:26:33 +00:00
|
|
|
|
2022-10-05 00:45:58 +00:00
|
|
|
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|teddit\.net)\/(r|u)\/", flags=re.A)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-06-24 17:32:31 +00:00
|
|
|
color_regex = re.compile("[a-z0-9]{6}", flags=re.A)
|
|
|
|
|
2022-08-30 22:11:45 +00:00
|
|
|
# lazy match on the {}?, only match if there is trailing stuff
|
2022-10-06 06:17:28 +00:00
|
|
|
# Specifically match Snappy's way of formatting, this might break some losers' comments.
|
|
|
|
showmore_regex = re.compile(r"^(.{3000,}?</p>(?:</li></ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)
|
2022-06-24 14:30:59 +00:00
|
|
|
|
2022-07-06 11:49:13 +00:00
|
|
|
search_token_regex = re.compile('"([^"]*)"|(\S+)', flags=re.A)
|
|
|
|
|
|
|
|
git_regex = re.compile("ref: (refs/.+)", flags=re.A)
|
|
|
|
|
2022-07-23 13:35:45 +00:00
|
|
|
pronouns_regex = re.compile("([a-z]{1,5})/[a-z]{1,5}(/[a-z]{1,5})?", flags=re.A|re.I)
|
2022-07-11 16:46:08 +00:00
|
|
|
|
2022-07-18 08:39:21 +00:00
|
|
|
knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A)
|
|
|
|
|
2022-06-24 14:30:59 +00:00
|
|
|
def sub_matcher(match, upper=False):
|
2022-07-17 19:17:46 +00:00
|
|
|
if match.group(0).startswith('<'):
|
|
|
|
return match.group(0)
|
|
|
|
else:
|
|
|
|
repl = SLURS[match.group(0).lower()]
|
2022-09-18 10:19:37 +00:00
|
|
|
return repl if not upper or "<img" in repl else repl.upper()
|
2022-06-24 14:30:59 +00:00
|
|
|
|
|
|
|
def sub_matcher_upper(match):
|
|
|
|
return sub_matcher(match, upper=True)
|
|
|
|
|
|
|
|
def censor_slurs(body, logged_user):
|
|
|
|
if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
|
|
|
|
body = slur_regex_upper.sub(sub_matcher_upper, body)
|
|
|
|
body = slur_regex.sub(sub_matcher, body)
|
|
|
|
return body
|
|
|
|
|
|
|
|
def torture_ap(body, username):
|
|
|
|
lines = body.splitlines(keepends=True)
|
|
|
|
|
|
|
|
for i in range(len(lines)):
|
|
|
|
if torture_regex_exclude.match(lines[i]):
|
|
|
|
continue
|
|
|
|
for k, l in AJ_REPLACEMENTS.items():
|
|
|
|
lines[i] = lines[i].replace(k, l)
|
|
|
|
lines[i] = torture_regex.sub(rf'\1@{username} ', lines[i])
|
|
|
|
lines[i] = torture_regex2.sub(rf'\1@{username} is ', lines[i])
|
|
|
|
|
2022-09-05 20:05:04 +00:00
|
|
|
return ''.join(lines).strip()
|
2022-07-11 12:14:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
commands = {
|
|
|
|
"fortune": FORTUNE_REPLIES,
|
|
|
|
"factcheck": FACTCHECK_REPLIES,
|
2022-07-13 13:06:59 +00:00
|
|
|
"8ball": EIGHTBALL_REPLIES,
|
|
|
|
"roll": range(1, 9999)
|
2022-07-11 12:14:18 +00:00
|
|
|
}
|
|
|
|
|
2022-07-13 13:06:59 +00:00
|
|
|
command_regex = re.compile("(\s|\n|^)#(fortune|factcheck|8ball|roll)", flags=re.A|re.I)
|
2022-07-11 12:14:18 +00:00
|
|
|
|
|
|
|
def command_regex_matcher(match, upper=False):
|
2022-07-14 14:43:05 +00:00
|
|
|
result = str(choice(commands[match.group(2).lower()]))
|
|
|
|
if match.group(2) == 'roll':
|
|
|
|
color = tuple(choices(range(256), k=3))
|
|
|
|
result = f'<b style="color:rgb{color}">Your roll: {result}</b>'
|
2022-08-30 21:19:53 +00:00
|
|
|
return match.group(1) + result
|