remotes/1693045480750635534/spooky-22
Aevann1 2022-04-17 22:20:40 +02:00
parent 2de411fe23
commit 78ea56837f
8 changed files with 120 additions and 86 deletions

View File

@ -473,13 +473,14 @@ class Submission(Base):
@property
@lazy
def is_video(self):
return self.url and any((self.url.lower().endswith(x) for x in ('.mp4','.webm','.mov')))
return self.url and any((self.url.lower().endswith(x) for x in ('.mp4','.webm','.mov'))) and video_regex.fullmatch(self.url)
@property
@lazy
def is_image(self):
if self.url: return self.url.lower().endswith('.webp') or self.url.lower().endswith('.jpg') or self.url.lower().endswith('.png') or self.url.lower().endswith('.gif') or self.url.lower().endswith('.jpeg') or self.url.lower().endswith('?maxwidth=9999') or self.url.lower().endswith('&fidelity=high')
else: return False
if self.url and (self.url.lower().endswith('.webp') or self.url.lower().endswith('.jpg') or self.url.lower().endswith('.png') or self.url.lower().endswith('.gif') or self.url.lower().endswith('.jpeg') or self.url.lower().endswith('?maxwidth=9999') or self.url.lower().endswith('&fidelity=high')) and embed_check_regex.fullmatch(self.url):
return True
return False
@lazy
def active_flags(self, v): return len(self.flags(v))

View File

@ -692,10 +692,6 @@ poll_regex = re.compile("\s*\$\$([^\$\n]+)\$\$\s*", flags=re.A)
bet_regex = re.compile("\s*\$\$\$([^\$\n]+)\$\$\$\s*", flags=re.A)
choice_regex = re.compile("\s*&&([^\$\n]+)&&\s*", flags=re.A)
embed_removing_regex = re.compile('!\[\]\((.*?)\)', flags=re.A)
image_check_regex = re.compile('!\[\]\(((?![^?\n]*\.(png|jpg|jpeg|gif|webp)).*?)\)', flags=re.I|re.A)
title_regex = re.compile("[^\w ]", flags=re.A)
based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)
@ -705,13 +701,11 @@ controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_
fishylinks_regex = re.compile("https?://\S+", flags=re.A)
spoiler_regex = re.compile('''\|\|(.+)\|\|''', flags=re.A)
video_regex = re.compile('<p><a href="(https:\/\/[\w\-.#&/=\?@%;+]{5,250}\.(mp4|webm|mov))" rel="nofollow noopener noreferrer" target="_blank">(https:\/\/[\w\-.#&/=\?@%;+]{5,250}\.(mp4|webm|mov))<\/a><\/p>', flags=re.I|re.A)
unlinked_regex = re.compile('''(^|\s|<p>)(https:\/\/[\w\-.#&/=\?@%;+]{5,250})''', flags=re.A)
imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!</(code|pre)>)', flags=re.I|re.A)
reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})', flags=re.A)
sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
youtube_regex = regex.compile('(?<!<(code|pre)>)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=regex.I|regex.A)
imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A)
youtube_regex = regex.compile('(?<!<(code|pre|a)>)https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*', flags=regex.I|regex.A)
yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
strikethrough_regex = re.compile('''~{1,2}([^~]+)~{1,2}''', flags=re.A)
@ -760,4 +754,47 @@ ADMIGGERS = (37696,37697,37749,37833,37838)
proxies = {"http":"http://127.0.0.1:18080","https":"http://127.0.0.1:18080"}
blackjack = environ.get("BLACKJACK", "").strip()
blackjack = environ.get("BLACKJACK", "").strip()
approved_embed_hosts = [
'i.imgur.com',
'i\.ibb\.co',
'pomf2\.lain\.la',
'pngfind\.com',
'i\.kym-cdn\.com',
'i2\.kym-cdn\.com',
'i\.redd\.it',
'cdn\.substack\.com',
'cdn\.discordapp\.com',
'2\.bp\.blogspot\.com',
'files\.catbox\.moe',
'i\.pinimg\.com',
'kindpng\.com',
'cdn\.shopify\.com',
'media\.discordapp\.net',
'pbs\.twimg\.com',
'upload\.wikimedia\.org',
'i0\.wp\.com',
'seekpng\.com',
'i\.dailymail\.co\.uk',
'de\.catbox\.moe',
'www\.cdc\.gov',
'm\.media-amazon\.com',
'www\.washingtonpost\.com',
'i\.imgflip\.com',
'farm2\.static\.flickr\.com',
'img-9gag-fun\.9cache\.com',
'i\.ytimg\.com',
'a57\.foxnews\.com',
'external-content\.duckduckgo\.com',
'blogs-images\.forbes\.com',
'images\.gr-assets\.com'
]
hosts = "|".join(approved_embed_hosts)
image_check_regex = re.compile(f'!\[\]\(((?!https:\/\/({hosts})\/).*?)\)', flags=re.A)
embed_check_regex = regex.compile(f'(?<!<(code|pre|a)>)https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*', flags=regex.A)
video_regex = regex.compile(f'((?<!<(code|pre|a)>)https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=regex.A)

View File

@ -1,6 +1,6 @@
import bleach
from bs4 import BeautifulSoup
from bleach.linkifier import LinkifyFilter
from bleach.linkifier import LinkifyFilter, build_url_re
from functools import partial
from .get import *
from .patter import pat
@ -13,6 +13,59 @@ import signal
import time
import requests
TLDS = ['ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at','au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br','bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl','club','cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec','edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf','gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn','hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo','jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk','lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo','mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name','nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg','ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw','sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st','su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp','tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn','vu','wf','win','ws','xn','xxx','ye','yt','yu','za','zm','zw']
allowed_tags = ['b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','source']
def allowed_attributes(tag, name, value):
if name == 'style': return True
if tag == 'marquee':
if name in ['direction', 'behavior', 'scrollamount']: return True
if name in {'height', 'width'}:
try: value = int(value.replace('px', ''))
except: return False
if 0 < value <= 250: return True
return False
if tag == 'a':
if name == 'href': return True
if name == 'rel' and value == 'nofollow noopener noreferrer': return True
if name == 'target' and value == '_blank': return True
return False
if tag == 'img':
if name in ['src','data-src']:
if value.startswith('/') or embed_check_regex.fullmatch(value): return True
else: return False
if name == 'loading' and value == 'lazy': return True
if name == 'referrpolicy' and value == 'no-referrer': return True
if name == 'data-bs-toggle' and value == 'tooltip': return True
if name in ['alt','title','g','b']: return True
return False
if tag == 'lite-youtube':
if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
if name == 'videoid': return True
return False
if tag == 'video':
if name == 'controls' and value == '': return True
if name == 'preload' and value == 'none': return True
return False
if tag == 'source':
return True
return False
if tag == 'p':
if name == 'class' and value == 'mb-0': return True
return False
url_re = build_url_re(tlds=TLDS, protocols=['http', 'https'])
def callback(attrs, new=False):
href = attrs[(None, "href")]
@ -29,7 +82,7 @@ def handler(signum, frame):
raise Exception("Timeout")
def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
def sanitize(sanitized, alert=False, comment=False, edit=False):
signal.signal(signal.SIGALRM, handler)
signal.alarm(1)
@ -176,11 +229,7 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
sanitized = sanitized.replace(url, htmlsource)
sanitized = unlinked_regex.sub(r'\1<a href="\2" rel="nofollow noopener noreferrer" target="_blank">\2</a>', sanitized)
if not noimages:
sanitized = video_regex.sub(r'<p><video controls preload="none"><source src="\1"></video>', sanitized)
sanitized = video_regex.sub(r'<video controls preload="none"><source src="\1"></video>', sanitized)
if comment:
for marsey in g.db.query(Marsey).filter(Marsey.name.in_(marseys_used)).all():
@ -199,61 +248,12 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
sanitized = sanitized.replace('<html><body>','').replace('</body></html>','')
allowed_tags = ['b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube']
if not noimages: allowed_tags += ['video','source']
def allowed_attributes(tag, name, value):
if name == 'style': return True
if tag == 'marquee':
if name in ['direction', 'behavior', 'scrollamount']: return True
if name in {'height', 'width'}:
try: value = int(value.replace('px', ''))
except: return False
if 0 < value <= 250: return True
return False
if tag == 'a':
if name == 'href': return True
if name == 'rel' and value == 'nofollow noopener noreferrer': return True
if name == 'target' and value == '_blank': return True
return False
if tag == 'img':
if name in ['src','data-src'] and not value.startswith('/') and noimages: return False
if name == 'loading' and value == 'lazy': return True
if name == 'referrpolicy' and value == 'no-referrer': return True
if name == 'data-bs-toggle' and value == 'tooltip': return True
if name in ['src','data-src','alt','title','g','b']: return True
return False
if tag == 'lite-youtube':
if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
if name == 'videoid': return True
return False
if tag == 'video':
if name == 'controls' and value == '': return True
if name == 'preload' and value == 'none': return True
return False
if tag == 'source':
if name == 'src': return True
return False
if tag == 'p':
if name == 'class' and value == 'mb-0': return True
return False
sanitized = bleach.Cleaner(tags=allowed_tags,
attributes=allowed_attributes,
protocols=['http', 'https'],
styles=['color', 'background-color', 'font-weight', 'text-align'],
filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback])]
filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)]
).clean(sanitized)
@ -266,13 +266,13 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
def allowed_attributes(tag, name, value):
def allowed_attributes_emojis(tag, name, value):
if tag == 'img':
if name == 'loading' and value == 'lazy': return True
if name == 'data-bs-toggle' and value == 'tooltip': return True
if name in ['src','alt','title','g']: return True
return False
return False
def filter_emojis_only(title, edit=False, graceful=False):
@ -308,7 +308,7 @@ def filter_emojis_only(title, edit=False, graceful=False):
title = strikethrough_regex.sub(r'<del>\1</del>', title)
sanitized = bleach.clean(title, tags=['img','del'], attributes=allowed_attributes, protocols=['http','https'])
sanitized = bleach.clean(title, tags=['img','del'], attributes=allowed_attributes_emojis, protocols=['http','https'])
signal.alarm(0)

View File

@ -541,7 +541,7 @@ def change_settings(v, setting):
body = f"@{v.username} has {word}d `{setting}` in the [admin dashboard](/admin)!"
body_html = sanitize(body, noimages=True)
body_html = sanitize(body)
new_comment = Comment(author_id=NOTIFICATIONS_ID,
parent_submission=None,

View File

@ -54,7 +54,7 @@ def request_api_keys(v):
body = f"@{v.username} has requested API keys for `{request.values.get('name')}`. You can approve or deny the request [here](/admin/apps)."
body_html = sanitize(body, noimages=True)
body_html = sanitize(body)
new_comment = Comment(author_id=NOTIFICATIONS_ID,

View File

@ -727,7 +727,7 @@ def thumbnail_thread(pid):
if i["subreddit"] == 'PokemonGoRaids': continue
body_html = sanitize(f'New site mention: https://old.reddit.com{i["permalink"]}?context=89', noimages=True)
body_html = sanitize(f'New site mention: https://old.reddit.com{i["permalink"]}?context=89')
existing_comment = db.query(Comment.id).filter_by(author_id=NOTIFICATIONS_ID, parent_submission=None, body_html=body_html).one_or_none()
if existing_comment: break
@ -755,7 +755,7 @@ def thumbnail_thread(pid):
except: break
for i in data:
body_html = sanitize(f'New mention of you: https://old.reddit.com{i["permalink"]}?context=89', noimages=True)
body_html = sanitize(f'New mention of you: https://old.reddit.com{i["permalink"]}?context=89')
existing_comment = db.query(Comment.id).filter_by(author_id=NOTIFICATIONS_ID, parent_submission=None,body_html=body_html).one_or_none()
if existing_comment: break
@ -784,7 +784,7 @@ def thumbnail_thread(pid):
except: break
for i in data:
body_html = sanitize(f'New site mention: https://old.reddit.com{i["permalink"]}?context=89', noimages=True)
body_html = sanitize(f'New site mention: https://old.reddit.com{i["permalink"]}?context=89')
existing_comment = db.query(Comment.id).filter_by(author_id=NOTIFICATIONS_ID, parent_submission=None, body_html=body_html).one_or_none()
@ -960,7 +960,7 @@ def submit_post(v, sub=None):
Submission.deleted_utc == 0,
Submission.is_banned == False
).first()
if repost: return redirect(repost.permalink)
if repost and SITE != 'localhost': return redirect(repost.permalink)
domain_obj = get_domain(domain)
if not domain_obj: domain_obj = get_domain(domain+parsed_url.path)
@ -1012,7 +1012,7 @@ def submit_post(v, sub=None):
Submission.body == body
).one_or_none()
if dup: return redirect(dup.permalink)
if dup and SITE != 'localhost': return redirect(dup.permalink)
now = int(time.time())
cutoff = now - 60 * 60 * 24

View File

@ -301,7 +301,7 @@ def submit_contact(v):
if not body: abort(400)
body = f'This message has been sent automatically to all admins via [/contact](/contact)\n\nMessage:\n\n' + body
body_html = sanitize(body, noimages=True)
body_html = sanitize(body)
if request.files.get("file") and request.headers.get("cf-ipcountry") != "T1":
file=request.files["file"]

View File

@ -606,9 +606,7 @@ def message2(v, username):
if 'linkedin.com' in message: return {"error": "This domain 'linkedin.com' is banned."}, 403
message = embed_removing_regex.sub(r'\1', message)
body_html = sanitize(message, noimages=True)
body_html = sanitize(message)
existing = g.db.query(Comment.id).filter(Comment.author_id == v.id,
Comment.sentto == user.id,
@ -666,8 +664,6 @@ def messagereply(v):
if 'linkedin.com' in message: return {"error": "this domain 'linkedin.com' is banned"}
message = embed_removing_regex.sub(r'\1', message)
id = int(request.values.get("parent_id"))
parent = get_comment(id, v=v)
user_id = parent.author.id
@ -675,7 +671,7 @@ def messagereply(v):
if parent.sentto == 2: user_id = None
elif v.id == user_id: user_id = parent.sentto
body_html = sanitize(message, noimages=True)
body_html = sanitize(message)
if request.files.get("file") and request.headers.get("cf-ipcountry") != "T1":
file=request.files["file"]