remotes/1693045480750635534/spooky-22
Aevann1 2022-03-17 20:38:14 +02:00
parent 7eef18c69d
commit 6a8fac96fc
1 changed files with 10 additions and 35 deletions

View File

@ -10,8 +10,6 @@ from json import loads, dump
from random import random, choice
import signal
import time
from urllib.parse import ParseResult, urlunparse, urlparse
allowed_tags = tags = ['b',
'blockquote',
@ -117,13 +115,7 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
sanitized = strikethrough_regex.sub(r'<del>\1</del>', sanitized)
sanitized = sanitized.replace("\ufeff", "").replace("𒐪","").replace("<script","").replace("script>","").replace('','').replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube").replace("old.reddit.com/gallery", "reddit.com/gallery")
if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it"]:
sanitized = sanitized.replace(rd, "://old.reddit.com")
sanitized = sanitized.replace("\ufeff", "").replace("𒐪","").replace("<script","").replace("script>","").replace('','')
if alert:
captured = []
@ -203,32 +195,6 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
except: tag.string = ""
parsed_url = urlparse(tag.get("href"))
domain = parsed_url.netloc
if domain == 'old.reddit.com':
new_url = ParseResult(scheme="https",
netloc=parsed_url.netloc,
path=parsed_url.path,
params=parsed_url.params,
query=None,
fragment=parsed_url.fragment)
else:
qd = parse_qs(parsed_url.query)
filtered = {k: val for k, val in qd.items() if not k.startswith('utm_') and not k.startswith('ref_')}
new_url = ParseResult(scheme="https",
netloc=parsed_url.netloc,
path=parsed_url.path,
params=parsed_url.params,
query=urlencode(filtered, doseq=True),
fragment=parsed_url.fragment)
new_url = urlunparse(new_url)
if tag.string == tag["href"]: tag.string = new_url
tag["href"] = new_url
sanitized = str(soup)
@ -310,6 +276,10 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
sanitized = re.sub(f'(?<!"):{i.group(1).lower()}:', f'<img loading="lazy" data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:" class="{classes}" src="/e/{emoji}.webp">', sanitized, flags=re.I|re.A)
if comment: marseys_used.add(emoji)
sanitized = sanitized.replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube")
if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
captured = []
for i in youtube_regex.finditer(sanitized):
if i.group(0) in captured: continue
@ -329,6 +299,11 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
sanitized = sanitized.replace(replacing, htmlsource)
for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it"]:
sanitized = sanitized.replace(rd, "://old.reddit.com")
sanitized = sanitized.replace("old.reddit.com/gallery", "reddit.com/gallery")
sanitized = unlinked_regex.sub(r'\1<a href="\2" rel="nofollow noopener noreferrer" target="_blank">\2</a>', sanitized)