destroy the /s/ menace

master
Aevann 2023-09-08 21:08:02 +03:00
parent 9828c6781f
commit f84ba52d9c
2 changed files with 6 additions and 1 deletions

View File

@ -231,7 +231,8 @@ asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\
#sanitizing
reddit_mention_regex = re.compile('(?<![\w/])\/?(([ruRU])\/(\w|-){2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(u|(r\/(\w|-){2,25}\/)?comments)\/", flags=re.A)
reddit_s_regex = re.compile("https:\/\/reddit.com\/r\/(\w|-){2,25}\/s\/\w{10}", flags=re.A)
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(r|u|comments)\/", flags=re.A)
#run-time
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)

View File

@ -6,6 +6,7 @@ from functools import partial
from os import path, listdir
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
import time
import requests
from sqlalchemy.sql import func
@ -742,6 +743,9 @@ def is_whitelisted(domain, k):
def normalize_url(url):
url = unquote(url)
if reddit_s_regex.fullmatch(url):
url = requests.get(url, headers=HEADERS, timeout=1, proxies=proxies).url
url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \