diff --git a/files/helpers/regex.py b/files/helpers/regex.py index b22d72e15..f965c5303 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -231,7 +231,8 @@ asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\ #sanitizing reddit_mention_regex = re.compile('(?|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 205b04398..9b59e3306 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -6,6 +6,7 @@ from functools import partial from os import path, listdir from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time +import requests from sqlalchemy.sql import func @@ -742,6 +743,9 @@ def is_whitelisted(domain, k): def normalize_url(url): url = unquote(url) + if reddit_s_regex.fullmatch(url): + url = requests.get(url, headers=HEADERS, timeout=1, proxies=proxies).url + url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url) url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \