From c6230e5f50e016746886ba95f8b51eccc0aad63c Mon Sep 17 00:00:00 2001
From: Aevann1 <randomname42029@gmail.com>
Date: Sat, 16 Apr 2022 16:58:07 +0200
Subject: [PATCH] fsd

---
 files/helpers/const.py    | 10 +++++++---
 files/helpers/sanitize.py | 18 ++++++------------
 2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/files/helpers/const.py b/files/helpers/const.py
index 31b98baf7..00a886671 100644
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@@ -5,6 +5,7 @@ from json import loads
 from files.__main__ import db_session
 from files.classes.sub import Sub
 from files.classes.marsey import Marsey
+import regex
 
 SITE = environ.get("DOMAIN", '').strip()
 SITE_NAME = environ.get("SITE_NAME", '').strip()
@@ -713,7 +714,7 @@ imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|web
 reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})', flags=re.A)
 sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
 
-youtube_regex = re.compile('" target="_blank">(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)</a>(?!</code>)', flags=re.I|re.A)
+youtube_regex = re.compile('(?<!<code>)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=re.I|re.A)
 yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
 
 strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
@@ -732,8 +733,8 @@ email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|
 utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
 utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
 
-slur_regex = re.compile(f"(?<! href=[^>]{0,255})({single_words})", flags=re.I|re.A)
-slur_regex_upper = re.compile(f"(?<! href=[^>]{0,255})({single_words.upper()})", flags=re.A)
+slur_regex = regex.compile(f"(?<!<(a|img|video) [^>]*)({single_words})", flags=regex.I|regex.A)
+slur_regex_upper = regex.compile(f"(?<!<(a|img|video) [^>]*)({single_words.upper()})", flags=regex.A)
 torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A)
 torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A)
 
@@ -744,6 +745,9 @@ def sub_matcher_upper(match):
 	return SLURS[match.group(0).lower()].upper()
 
 def censor_slurs(body, logged_user):
+	if '=":marseygigaretard:" data' in body:
+		for i in slur_regex.finditer(body):
+			print(i)
 	if not logged_user or logged_user == 'chat' or logged_user.slurreplacer:
 		body = slur_regex_upper.sub(sub_matcher_upper, body)
 		body = slur_regex.sub(sub_matcher, body)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 433a20059..0e4ab5235 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -158,24 +158,19 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
 
 	captured = []
 	for i in youtube_regex.finditer(sanitized):
-		if i.group(0) in captured: continue
-		captured.append(i.group(0))
-
-		url = i.group(1)
-		yt_id = i.group(2).split('&')[0].split('%')[0]
-		if not yt_id_regex.fullmatch(yt_id): continue
-
-		replacing = f'<a href="{url}" rel="nofollow noopener noreferrer" target="_blank">{url}</a>'
+		url = i.group(0)
+		if url in captured: continue
+		captured.append(url)
 
 		params = parse_qs(urlparse(url.replace('&amp;','&')).query)
 		t = params.get('t', params.get('start', [0]))[0]
 		if isinstance(t, str): t = t.replace('s','')
 
-		htmlsource = f'<lite-youtube videoid="{yt_id}" params="autoplay=1&modestbranding=1'
+		htmlsource = f'<lite-youtube videoid="{i.group(1)}" params="autoplay=1&modestbranding=1'
 		if t: htmlsource += f'&start={t}'
 		htmlsource += '"></lite-youtube>'
 
-		sanitized = sanitized.replace(replacing, htmlsource)
+		sanitized = sanitized.replace(url, htmlsource)
 
 
 	sanitized = unlinked_regex.sub(r'\1<a href="\2" rel="nofollow noopener noreferrer" target="_blank">\2</a>', sanitized)
@@ -249,8 +244,7 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
 	sanitized = bleach.Cleaner(tags=allowed_tags,
 								attributes=allowed_attributes,
 								protocols=['http', 'https'],
-								styles=['color', 'background-color', 'font-weight', 'text-align'],
-								filters=[partial(LinkifyFilter,skip_tags=["pre"],parse_email=False)]
+								styles=['color', 'background-color', 'font-weight', 'text-align']
 								).clean(sanitized)