diff --git a/files/classes/submission.py b/files/classes/submission.py index d24081541..abedf3db6 100644 --- a/files/classes/submission.py +++ b/files/classes/submission.py @@ -473,12 +473,12 @@ class Submission(Base): @property @lazy def is_video(self): - return self.url and any((self.url.lower().endswith(x) for x in ('.mp4','.webm','.mov'))) and video_regex.fullmatch(self.url) + return self.url and any((self.url.lower().endswith(x) for x in ('.mp4','.webm','.mov'))) and embed_fullmatch_regex.fullmatch(self.url) @property @lazy def is_image(self): - if self.url and (self.url.lower().endswith('.webp') or self.url.lower().endswith('.jpg') or self.url.lower().endswith('.png') or self.url.lower().endswith('.gif') or self.url.lower().endswith('.jpeg') or self.url.lower().endswith('?maxwidth=9999') or self.url.lower().endswith('&fidelity=high')) and (self.url.startswith('/') or self.url.startswith(f'{SITE_FULL}/') or embed_check_regex.fullmatch(self.url)): + if self.url and (self.url.lower().endswith('.webp') or self.url.lower().endswith('.jpg') or self.url.lower().endswith('.png') or self.url.lower().endswith('.gif') or self.url.lower().endswith('.jpeg') or self.url.lower().endswith('?maxwidth=9999') or self.url.lower().endswith('&fidelity=high')) and (self.url.startswith('/') or self.url.startswith(f'{SITE_FULL}/') or embed_fullmatch_regex.fullmatch(self.url)): return True return False diff --git a/files/helpers/const.py b/files/helpers/const.py index e863ea11b..17cb4bc39 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -702,10 +702,6 @@ spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A) reddit_regex = re.compile('(^|\s|
)\/?((r|u)\/(\w|-){3,25})', flags=re.A) sub_regex = re.compile('(^|\s|
)\/?(h\/(\w|-){3,25})', flags=re.A) -imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A) -youtube_regex = regex.compile('(?)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=regex.I|regex.A) -yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) - strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A) mute_regex = re.compile("/mute @([a-z0-9_\-]{3,25}) ([0-9])+", flags=re.A) @@ -811,6 +807,12 @@ hosts = "|".join(approved_embed_hosts).replace('.','\.') image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\/|\/images\/)).*?)\)', flags=re.A) -embed_check_regex = regex.compile(f'(?)https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*', flags=regex.A) +embed_fullmatch_regex = re.compile(f'https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.A) -video_regex = regex.compile(f'((?)https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=regex.A) \ No newline at end of file +video_sub_regex = re.compile(f'(
[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A) + +imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A) + +youtube_regex = re.compile('(
[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A)
+
+yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
\ No newline at end of file
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index a251b6426..6965da360 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -37,7 +37,7 @@ def allowed_attributes(tag, name, value):
if tag == 'img':
if name in ['src','data-src']:
- if value.startswith('/') or value.startswith(f'{SITE_FULL}/') or embed_check_regex.fullmatch(value): return True
+ if value.startswith('/') or value.startswith(f'{SITE_FULL}/') or embed_fullmatch_regex.fullmatch(value): return True
else: return False
if name == 'loading' and value == 'lazy': return True
@@ -57,7 +57,7 @@ def allowed_attributes(tag, name, value):
return False
if tag == 'source':
- return True
+ if name == 'src' and embed_fullmatch_regex.fullmatch(value): return True
return False
if tag == 'p':
@@ -215,21 +215,20 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
captured = []
for i in youtube_regex.finditer(sanitized):
- url = i.group(0)
- if url in captured: continue
- captured.append(url)
+ if i.group(0) in captured: continue
+ captured.append(i.group(0))
- params = parse_qs(urlparse(url.replace('&','&')).query)
+ params = parse_qs(urlparse(i.group(2).replace('&','&')).query)
t = params.get('t', params.get('start', [0]))[0]
if isinstance(t, str): t = t.replace('s','')
- htmlsource = f'