From 1c7458e1114aced74f9bbc5e34d82fbb46bebf14 Mon Sep 17 00:00:00 2001 From: TLSM Date: Wed, 25 May 2022 04:43:16 -0400 Subject: [PATCH] Sanitize: modularize normalize_url, fix streamable. Originally prompted by https://rdrama.net/post/18459/-/1984609 which noticed that streamable.com/e/ links as posts would have another e/ added to them. This was in spite of logic in posts.py api_is_repost and submit_post designed to specifically counteract this. Proximal cause was a copypasta'd url.replace(...) chain which caused the mistake before the streamable-specific logic had a chance to avoid making it. Solution: remove the streamable replacement from the chained statement and create `helpers.normalize_url(url)` to get rid of the copypasta. --- files/helpers/sanitize.py | 30 +++++++++++++++++++++++++++++- files/routes/posts.py | 18 ++---------------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index f93d68198..4818c3505 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -251,7 +251,7 @@ def sanitize(sanitized, alert=False, comment=False, edit=False): for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]: sanitized = sanitized.replace(rd, "://old.reddit.com") - sanitized = sanitized.replace("nitter.net", "twitter.com").replace("old.reddit.com/gallery", "reddit.com/gallery").replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube").replace("https://www.twitter", "https://twitter").replace("https://www.instagram", "https://instagram").replace("https://www.tiktok", "https://tiktok") + sanitized = normalize_url(sanitized) sanitized = sanitized.replace('&','&') @@ -380,3 +380,31 @@ def filter_emojis_only(title, edit=False, graceful=False): if len(title) > 1500 and not graceful: abort(400) else: return title + +def normalize_url(url): + url = url.replace("nitter.net", "twitter.com") \ + .replace("old.reddit.com/gallery", "reddit.com/gallery") \ + .replace("https://youtu.be/", "https://youtube.com/watch?v=") \ + .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \ + .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \ + .replace("https://mobile.twitter", "https://twitter") \ + .replace("https://m.facebook", "https://facebook") \ + .replace("m.wikipedia.org", "wikipedia.org") \ + .replace("https://m.youtube", "https://youtube") \ + .replace("https://www.youtube", "https://youtube") \ + .replace("https://www.twitter", "https://twitter") \ + .replace("https://www.instagram", "https://instagram") \ + .replace("https://www.tiktok", "https://tiktok") + + if "/i.imgur.com/" in url: + url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp") + elif "/media.giphy.com/" in url or "/c.tenor.com/" in url: + url = url.replace(".gif", ".webp") + elif "/i.ibb.co/" in url: + url = url.replace(".png", ".webp").replace(".jpg", ".webp")\ + .replace(".jpeg", ".webp").replace(".gif", ".webp") + + if url.startswith("https://streamable.com/") and not url.startswith("https://streamable.com/e/"): + url = url.replace("https://streamable.com/", "https://streamable.com/e/") + + return url diff --git a/files/routes/posts.py b/files/routes/posts.py index 2ea151e03..d4f8d84e5 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -812,14 +812,7 @@ def api_is_repost(): for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"): url = url.replace(rd, "://old.reddit.com") - url = url.replace("nitter.net", "twitter.com").replace("old.reddit.com/gallery", "reddit.com/gallery").replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube").replace("https://www.twitter", "https://twitter").replace("https://www.instagram", "https://instagram").replace("https://www.tiktok", "https://tiktok") - - if "/i.imgur.com/" in url: url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp") - elif "/media.giphy.com/" in url or "/c.tenor.com/" in url: url = url.replace(".gif", ".webp") - elif "/i.ibb.co/" in url: url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp").replace(".gif", ".webp") - - if url.startswith("https://streamable.com/") and not url.startswith("https://streamable.com/e/"): url = url.replace("https://streamable.com/", "https://streamable.com/e/") - + url = normalize_url(url) parsed_url = urlparse(url) domain = parsed_url.netloc @@ -908,14 +901,7 @@ def submit_post(v, sub=None): for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"): url = url.replace(rd, "://old.reddit.com") - url = url.replace("nitter.net", "twitter.com").replace("old.reddit.com/gallery", "reddit.com/gallery").replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube").replace("https://www.twitter", "https://twitter").replace("https://www.instagram", "https://instagram").replace("https://www.tiktok", "https://tiktok") - - if "/i.imgur.com/" in url: url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp") - elif "/media.giphy.com/" in url or "/c.tenor.com/" in url: url = url.replace(".gif", ".webp") - elif "/i.ibb.co/" in url: url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp").replace(".gif", ".webp") - - if url.startswith("https://streamable.com/") and not url.startswith("https://streamable.com/e/"): url = url.replace("https://streamable.com/", "https://streamable.com/e/") - + url = normalize_url(url) parsed_url = urlparse(url) domain = parsed_url.netloc