the final solution to reddit /s/ links --- in posts only

pull/200/head
Aevann 2023-09-09 00:32:23 +03:00
parent bcab0ef9c3
commit ca9482b520
2 changed files with 37 additions and 3 deletions

View File

@ -233,5 +233,8 @@ asset_image_link_regex = re.compile(f"https:\/\/(i\.)?{SITE}\/assets\/images\/[\
reddit_mention_regex = re.compile('(?<![\w/])\/?(([ruRU])\/(\w|-){2,25})' + NOT_IN_CODE_OR_LINKS, flags=re.A)
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2})(?:-[A-z]{2})" "?|beta|i|m|pay|ssl|www|new|alpha)\.reddit\.com|libredd\.it|reddit\.lol)\/(u|(r\/(\w|-){2,25}\/)?comments)\/", flags=re.A)
#gevent
reddit_s_url_regex = re.compile("https:\/\/reddit.com\/r\/(\w|-){2,25}\/s\/\w{10}", flags=re.A)
#run-time
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)

View File

@ -284,7 +284,38 @@ def expand_url(post_url, fragment_url):
else:
return f"{post_url}/{fragment_url}"
def thumbnail_thread(fetch_url, pid):
def reddit_s_url_cleaner(match):
return normalize_url(requests.get(match.group(0), headers=HEADERS, timeout=2, proxies=proxies).url)
def surl_and_thumbnail_thread(old_url, old_body_html, pid, generate_thumb):
fetch_url = old_url
#s_url
new_url = None
if old_url:
new_url = reddit_s_url_regex.sub(reddit_s_url_cleaner, old_url)
new_body_html = None
if old_body_html:
new_body_html = reddit_s_url_regex.sub(reddit_s_url_cleaner, old_body_html)
if old_url != new_url or old_body_html != new_body_html:
db = db_session()
p = db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
p.url = new_url
fetch_url = p.url
p.body_html = new_body_html
db.commit()
db.close()
stdout.flush()
#thumbnail
if not generate_thumb: return
if fetch_url.startswith('/') and '\\' not in fetch_url:
fetch_url = f"{SITE_FULL}{fetch_url}"
@ -658,8 +689,8 @@ def submit_post(v, sub=None):
g.db.flush() #Necessary, do NOT remove
if not p.thumburl and p.url and p.domain != SITE:
gevent.spawn(thumbnail_thread, p.url, p.id)
generate_thumb = (not p.thumburl and p.url and p.domain != SITE)
gevent.spawn(surl_and_thumbnail_thread, p.url, p.body_html, p.id, generate_thumb)
if v.client: return p.json
else: