From c6230e5f50e016746886ba95f8b51eccc0aad63c Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Sat, 16 Apr 2022 16:58:07 +0200 Subject: [PATCH] fsd --- files/helpers/const.py | 10 +++++++--- files/helpers/sanitize.py | 18 ++++++------------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/files/helpers/const.py b/files/helpers/const.py index 31b98baf77..00a886671e 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -5,6 +5,7 @@ from json import loads from files.__main__ import db_session from files.classes.sub import Sub from files.classes.marsey import Marsey +import regex SITE = environ.get("DOMAIN", '').strip() SITE_NAME = environ.get("SITE_NAME", '').strip() @@ -713,7 +714,7 @@ imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|web reddit_regex = re.compile('(^|\s|

)\/?((r|u)\/(\w|-){3,25})', flags=re.A) sub_regex = re.compile('(^|\s|

)\/?(h\/(\w|-){3,25})', flags=re.A) -youtube_regex = re.compile('" target="_blank">(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)(?!)', flags=re.I|re.A) +youtube_regex = re.compile('(?)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=re.I|re.A) yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A) @@ -732,8 +733,8 @@ email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z| utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) -slur_regex = re.compile(f"(?]{0,255})({single_words})", flags=re.I|re.A) -slur_regex_upper = re.compile(f"(?]{0,255})({single_words.upper()})", flags=re.A) +slur_regex = regex.compile(f"(?]*)({single_words})", flags=regex.I|regex.A) +slur_regex_upper = regex.compile(f"(?]*)({single_words.upper()})", flags=regex.A) torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) @@ -744,6 +745,9 @@ def sub_matcher_upper(match): return SLURS[match.group(0).lower()].upper() def censor_slurs(body, logged_user): + if '=":marseygigaretard:" data' in body: + for i in slur_regex.finditer(body): + print(i) if not logged_user or logged_user == 'chat' or logged_user.slurreplacer: body = slur_regex_upper.sub(sub_matcher_upper, body) body = slur_regex.sub(sub_matcher, body) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 433a20059b..0e4ab52353 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -158,24 +158,19 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False): captured = [] for i in youtube_regex.finditer(sanitized): - if i.group(0) in captured: continue - captured.append(i.group(0)) - - url = i.group(1) - yt_id = i.group(2).split('&')[0].split('%')[0] - if not yt_id_regex.fullmatch(yt_id): continue - - replacing = f'{url}' + url = i.group(0) + if url in captured: continue + captured.append(url) params = parse_qs(urlparse(url.replace('&','&')).query) t = params.get('t', params.get('start', [0]))[0] if isinstance(t, str): t = t.replace('s','') - htmlsource = f'' - sanitized = sanitized.replace(replacing, htmlsource) + sanitized = sanitized.replace(url, htmlsource) sanitized = unlinked_regex.sub(r'\1\2', sanitized) @@ -249,8 +244,7 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False): sanitized = bleach.Cleaner(tags=allowed_tags, attributes=allowed_attributes, protocols=['http', 'https'], - styles=['color', 'background-color', 'font-weight', 'text-align'], - filters=[partial(LinkifyFilter,skip_tags=["pre"],parse_email=False)] + styles=['color', 'background-color', 'font-weight', 'text-align'] ).clean(sanitized)