fix reddit user /s/ links without having to visit them

pull/212/head
Aevann 2023-10-06 19:51:23 +03:00
parent a045d91305
commit 0cb0015127
3 changed files with 6 additions and 3 deletions

View File

@ -182,11 +182,12 @@ search_regex_3 = re.compile(r'\s+', flags=re.A)
#sanitizing
reddit_mention_regex = re.compile('(?<![\w/])\/?(([ruRU])\/[\w-]{2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
reddit_s_url_user_regex = re.compile("https:\/\/(www.)?reddit.com\/(user\/\w{2,25})\/s\/\w{10}", flags=re.A)
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|www|new)\.reddit\.com|libredd\.it|reddit\.lol)\/(user\/|(r\/\w{2,25}\/)?comments\/|r\/\w{2,25}\/?$)", flags=re.A)
reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/comments(\/\w+){3}\/?", flags=re.A)
#gevent
reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A)
reddit_s_url_sub_regex = re.compile("https:\/\/reddit.com\/r\/\w{2,25}\/s\/\w{10}", flags=re.A)
#run-time
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|user)\/', flags=re.A)

View File

@ -764,6 +764,8 @@ def normalize_url(url):
url = url.replace("reddit.com/u/", "reddit.com/user/")
url = reddit_s_url_user_regex.sub(r'https://old.reddit.com/\2', url)
url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3', url)
url = url.replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \

View File

@ -294,12 +294,12 @@ def surl_and_thumbnail_thread(post_url, post_body, post_body_html, pid, generate
#s_url
dirty = False
if post_url and reddit_s_url_regex.fullmatch(post_url):
if post_url and reddit_s_url_sub_regex.fullmatch(post_url):
post_url = reddit_s_url_cleaner(post_url)
dirty = True
if post_body:
for i in reddit_s_url_regex.finditer(post_body):
for i in reddit_s_url_sub_regex.finditer(post_body):
old = i.group(0)
new = reddit_s_url_cleaner(old)
post_body = post_body.replace(old, new)