Aevann1 2022-08-13 10:26:33 +02:00
parent 1e64d3c6b1
commit 2d9b42237e
3 changed files with 12 additions and 3 deletions

View File

@ -19,9 +19,8 @@ from math import floor
def normalize_urls_runtime(body, v):
if v:
body = body.replace("https://old.reddit.com/r/", f'https://{v.reddit}/r/').replace("https://old.reddit.com/u/", f'https://{v.reddit}/u/')
if v.reddit != 'old.reddit.com':
body = reddit_to_vreddit_regex.sub(rf'\1https://{v.reddit}/\2/', body)
if v.nitter: body = twitter_to_nitter_regex.sub(r'https://nitter.42l.fr/\1', body)
return body

View File

@ -90,6 +90,10 @@ def execute_snappy(post, v):
body += f"Snapshots:\n\n{rev}* [archive.org](https://web.archive.org/{newposturl})\n* [archive.ph](https://archive.ph/?url={quote(newposturl)}&run=1) (click to archive)\n* [ghostarchive.org](https://ghostarchive.org/search?term={quote(newposturl)}) (click to archive)\n\n"
gevent.spawn(archiveorg, newposturl)
if newposturl.startswith('https://twitter.com/'):
newposturl = newposturl.replace('https://twitter.com/', 'https://nitter.42l.fr/')
gevent.spawn(archiveorg, newposturl)
captured = []
body_for_snappy = post.body_html.replace(' data-src="', ' src="')
@ -124,6 +128,10 @@ def execute_snappy(post, v):
body += addition
gevent.spawn(archiveorg, href)
if href.startswith('https://twitter.com/'):
href = href.replace('https://twitter.com/', 'https://nitter.42l.fr/')
gevent.spawn(archiveorg, href)
body = body.strip()
body_html = sanitize(body)

View File

@ -92,6 +92,8 @@ ascii_only_regex = re.compile("[ -~]+", flags=re.A)
twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{2,15}(\/status\/\d+[^/]*)?)", flags=re.A)
reddit_to_vreddit_regex = re.compile('([>"])https:\/\/old.reddit.com\/(r|u)\/', flags=re.A)
reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|new\.reddit.com|www\.reddit.com|i\.reddit\.com|libredd\.it|teddit\.net)\/(r|u)\/", flags=re.A)
color_regex = re.compile("[a-z0-9]{6}", flags=re.A)