diff --git a/files/helpers/const.py b/files/helpers/const.py index 31b98baf7..00a886671 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -5,6 +5,7 @@ from json import loads from files.__main__ import db_session from files.classes.sub import Sub from files.classes.marsey import Marsey +import regex SITE = environ.get("DOMAIN", '').strip() SITE_NAME = environ.get("SITE_NAME", '').strip() @@ -713,7 +714,7 @@ imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|web reddit_regex = re.compile('(^|\s|
)\/?((r|u)\/(\w|-){3,25})', flags=re.A) sub_regex = re.compile('(^|\s|
)\/?(h\/(\w|-){3,25})', flags=re.A)
-youtube_regex = re.compile('" target="_blank">(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)(?!)', flags=re.I|re.A)
+youtube_regex = re.compile('(?)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=re.I|re.A)
yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
@@ -732,8 +733,8 @@ email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|
utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
-slur_regex = re.compile(f"(?]{0,255})({single_words})", flags=re.I|re.A)
-slur_regex_upper = re.compile(f"(?]{0,255})({single_words.upper()})", flags=re.A)
+slur_regex = regex.compile(f"(?]*)({single_words})", flags=regex.I|regex.A)
+slur_regex_upper = regex.compile(f"(?]*)({single_words.upper()})", flags=regex.A)
torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A)
torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A)
@@ -744,6 +745,9 @@ def sub_matcher_upper(match):
return SLURS[match.group(0).lower()].upper()
def censor_slurs(body, logged_user):
+ if '=":marseygigaretard:" data' in body:
+ for i in slur_regex.finditer(body):
+ print(i)
if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
body = slur_regex_upper.sub(sub_matcher_upper, body)
body = slur_regex.sub(sub_matcher, body)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 433a20059..0e4ab5235 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -158,24 +158,19 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
captured = []
for i in youtube_regex.finditer(sanitized):
- if i.group(0) in captured: continue
- captured.append(i.group(0))
-
- url = i.group(1)
- yt_id = i.group(2).split('&')[0].split('%')[0]
- if not yt_id_regex.fullmatch(yt_id): continue
-
- replacing = f'{url}'
+ url = i.group(0)
+ if url in captured: continue
+ captured.append(url)
params = parse_qs(urlparse(url.replace('&','&')).query)
t = params.get('t', params.get('start', [0]))[0]
if isinstance(t, str): t = t.replace('s','')
- htmlsource = f'