forked from MarseyWorld/MarseyWorld
destroy the /s/ menace
parent
9828c6781f
commit
f84ba52d9c
|
@ -231,7 +231,8 @@ asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\
|
|||
|
||||
#sanitizing
|
||||
reddit_mention_regex = re.compile('(?<![\w/])\/?(([ruRU])\/(\w|-){2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
|
||||
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(u|(r\/(\w|-){2,25}\/)?comments)\/", flags=re.A)
|
||||
reddit_s_regex = re.compile("https:\/\/reddit.com\/r\/(\w|-){2,25}\/s\/\w{10}", flags=re.A)
|
||||
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(r|u|comments)\/", flags=re.A)
|
||||
|
||||
#run-time
|
||||
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)
|
||||
|
|
|
@ -6,6 +6,7 @@ from functools import partial
|
|||
from os import path, listdir
|
||||
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
|
||||
import time
|
||||
import requests
|
||||
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
|
@ -742,6 +743,9 @@ def is_whitelisted(domain, k):
|
|||
def normalize_url(url):
|
||||
url = unquote(url)
|
||||
|
||||
if reddit_s_regex.fullmatch(url):
|
||||
url = requests.get(url, headers=HEADERS, timeout=1, proxies=proxies).url
|
||||
|
||||
url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
|
||||
|
||||
url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
|
||||
|
|
Loading…
Reference in New Issue