From 9f505a17fdaaee8c7b332ea3414764fa00853012 Mon Sep 17 00:00:00 2001 From: Aevann Date: Fri, 15 Sep 2023 04:20:18 +0300 Subject: [PATCH] fix youtu.be links being jumbled like this https://rdrama.net/post/200792 --- files/helpers/sanitize.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index d4ea78c21..c8c6922e2 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -744,8 +744,7 @@ def normalize_url(url): url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3', url) - url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \ - .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \ + url = url.replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \ .replace("https://www.youtube.com", "https://youtube.com") \ .replace("https://m.youtube.com", "https://youtube.com") \ .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \ @@ -779,14 +778,26 @@ def normalize_url(url): except: print(url, flush=True) abort(500) - domain = parsed_url.netloc + + netloc = parsed_url.netloc + path = parsed_url.path.rstrip('/') qd = parse_qs(parsed_url.query, keep_blank_values=True) - filtered = {k: val for k, val in qd.items() if is_whitelisted(domain, k)} - if domain == 'old.reddit.com' and reddit_comment_link_regex.fullmatch(url): + + filtered = {} + + if netloc == 'youtu.be': + filtered['v'] = path.lstrip('/') + netloc = 'youtube.com' + path = '/watch' + + filtered |= {k: val for k, val in qd.items() if is_whitelisted(netloc, k)} + + if netloc == 'old.reddit.com' and reddit_comment_link_regex.fullmatch(url): filtered['context'] = 8 + new_url = ParseResult(scheme="https", - netloc=parsed_url.netloc, - path=parsed_url.path.rstrip('/'), + netloc=netloc, + path=path, params=parsed_url.params, query=urlencode(filtered, doseq=True), fragment=parsed_url.fragment)