From 654ddc4157e8719da16f3c264a24873ecf91c048 Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Fri, 10 Jun 2022 22:02:15 +0200 Subject: [PATCH] re-refactor normalize_url --- files/helpers/const.py | 3 ++- files/helpers/sanitize.py | 29 ++++++++--------------------- files/routes/posts.py | 6 ------ 3 files changed, 10 insertions(+), 28 deletions(-) diff --git a/files/helpers/const.py b/files/helpers/const.py index dafd65698..965817105 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -1031,7 +1031,8 @@ image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\ video_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A) audio_sub_regex = re.compile(f'(

[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A) -imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A) +imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A) +giphy_tenor_regex = re.compile('(https:\/\/(media\.giphy\.com|c\.tenor\.com)\/[\/\-a-z0-9]+)\.gif(?!<\/(code|pre|a)>)', flags=re.I|re.A) youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 58e55085f..55ba04faf 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -201,7 +201,7 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}@{u.username}''', 1) - sanitized = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', sanitized) + sanitized = normalize_url(sanitized) soup = BeautifulSoup(sanitized, 'lxml') @@ -245,11 +245,6 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): sanitized = render_emoji(sanitized, emoji_regex2, edit, marseys_used) - for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]: - sanitized = sanitized.replace(rd, "://old.reddit.com") - - sanitized = sanitize_url(sanitized) - sanitized = sanitized.replace('&','&') if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=") @@ -378,8 +373,10 @@ def filter_emojis_only(title, edit=False, graceful=False): if len(title) > 1500 and not graceful: abort(400) else: return title -def sanitize_url(url): - # NB: Used in this file to sanitize all URLs in bulk text. +def normalize_url(url): + for x in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]: + url = url.replace(x, "://old.reddit.com") + url = url.replace("nitter.net", "twitter.com") \ .replace("old.reddit.com/gallery", "reddit.com/gallery") \ .replace("https://youtu.be/", "https://youtube.com/watch?v=") \ @@ -397,17 +394,7 @@ def sanitize_url(url): .replace("https://streamable.com/", "https://streamable.com/e/") \ .replace("https://streamable.com/e/e/", "https://streamable.com/e/") - return url + url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', url) + url = giphy_tenor_regex.sub(r'\1.webp', url) -def normalize_url(url): - url = sanitize_url(url) - - if "/i.imgur.com/" in url: - url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp") - elif "/media.giphy.com/" in url or "/c.tenor.com/" in url: - url = url.replace(".gif", ".webp") - elif "/i.ibb.co/" in url: - url = url.replace(".png", ".webp").replace(".jpg", ".webp")\ - .replace(".jpeg", ".webp").replace(".gif", ".webp") - - return url + return url \ No newline at end of file diff --git a/files/routes/posts.py b/files/routes/posts.py index bf8f33683..803105c79 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -728,9 +728,6 @@ def api_is_repost(): url = request.values.get('url') if not url: abort(400) - for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"): - url = url.replace(rd, "://old.reddit.com") - url = normalize_url(url) parsed_url = urlparse(url) @@ -819,9 +816,6 @@ def submit_post(v, sub=None): embed = None if url: - for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"): - url = url.replace(rd, "://old.reddit.com") - url = normalize_url(url) parsed_url = urlparse(url)