fix exceptions in sanitize leaving SIGALRM on

remotes/1693045480750635534/spooky-22
official-techsupport 2022-07-06 01:11:45 +03:00 committed by TLSM
parent e5a1d4205f
commit 548030fcf1
2 changed files with 34 additions and 21 deletions

4
.gitignore vendored
View File

@ -10,4 +10,6 @@ venv/
.sass-cache/ .sass-cache/
flask_session/ flask_session/
.DS_Store .DS_Store
site_settings.json site_settings.json
/files/test.py
tags

View File

@ -1,3 +1,4 @@
import functools
import bleach import bleach
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bleach.css_sanitizer import CSSSanitizer from bleach.css_sanitizer import CSSSanitizer
@ -53,7 +54,7 @@ def allowed_attributes(tag, name, value):
except: return False except: return False
if 0 < value <= 250: return True if 0 < value <= 250: return True
return False return False
if tag == 'a': if tag == 'a':
if name == 'href' and '\\' not in value and 'xn--' not in value: if name == 'href' and '\\' not in value and 'xn--' not in value:
return True return True
@ -121,10 +122,6 @@ def callback(attrs, new=False):
return attrs return attrs
def handler(signum, frame):
print("Timeout!", flush=True)
raise Exception("Timeout")
def render_emoji(html, regexp, edit, marseys_used, b=False): def render_emoji(html, regexp, edit, marseys_used, b=False):
emojis = list(regexp.finditer(html)) emojis = list(regexp.finditer(html))
captured = set() captured = set()
@ -164,11 +161,31 @@ def render_emoji(html, regexp, edit, marseys_used, b=False):
return html return html
def with_sigalrm_timeout(timeout: int):
'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'
# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
# async so if we timeout on that (or on a db op) then the process is crashed without returning
# a proper 500 error. Oh well.
def sig_handler(signum, frame):
print("Timeout!", flush=True)
raise Exception("Timeout")
def inner(func):
@functools.wraps(inner)
def wrapped(*args, **kwargs):
signal.signal(signal.SIGALRM, sig_handler)
signal.alarm(timeout)
try:
return func(*args, **kwargs)
finally:
signal.alarm(0)
return wrapped
return inner
@with_sigalrm_timeout(2)
def sanitize(sanitized, edit=False): def sanitize(sanitized, edit=False):
signal.signal(signal.SIGALRM, handler)
signal.alarm(2)
sanitized = sanitized.strip() sanitized = sanitized.strip()
sanitized = normalize_url(sanitized) sanitized = normalize_url(sanitized)
@ -232,9 +249,9 @@ def sanitize(sanitized, edit=False):
sanitized = str(soup) sanitized = str(soup)
sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized) sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)
marseys_used = set() marseys_used = set()
emojis = list(emoji_regex.finditer(sanitized)) emojis = list(emoji_regex.finditer(sanitized))
@ -311,7 +328,7 @@ def sanitize(sanitized, edit=False):
attributes=allowed_attributes, attributes=allowed_attributes,
protocols=['http', 'https'], protocols=['http', 'https'],
css_sanitizer=css_sanitizer, css_sanitizer=css_sanitizer,
filters=[partial(LinkifyFilter, skip_tags=["pre"], filters=[partial(LinkifyFilter, skip_tags=["pre"],
parse_email=False, callbacks=[callback], url_re=url_re)] parse_email=False, callbacks=[callback], url_re=url_re)]
).clean(sanitized) ).clean(sanitized)
@ -325,7 +342,7 @@ def sanitize(sanitized, edit=False):
href = link.get("href") href = link.get("href")
if not href: continue if not href: continue
url = urlparse(href) url = urlparse(href)
domain = url.netloc domain = url.netloc
url_path = url.path url_path = url.path
@ -348,8 +365,6 @@ def sanitize(sanitized, edit=False):
if len(sanitized) > 5000: if len(sanitized) > 5000:
sanitized = showmore_regex.sub(r'\1<p><button class="btn btn-primary" onclick="showmore()">SHOW MORE</button></p><div class="d-none">\2</div>', sanitized) sanitized = showmore_regex.sub(r'\1<p><button class="btn btn-primary" onclick="showmore()">SHOW MORE</button></p><div class="d-none">\2</div>', sanitized)
signal.alarm(0)
return sanitized.strip() return sanitized.strip()
@ -373,11 +388,9 @@ def allowed_attributes_emojis(tag, name, value):
return False return False
@with_sigalrm_timeout(1)
def filter_emojis_only(title, edit=False, graceful=False): def filter_emojis_only(title, edit=False, graceful=False):
signal.signal(signal.SIGALRM, handler)
signal.alarm(1)
title = title.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&amp;").replace('<','&lt;').replace('>','&gt;').replace('"', '&quot;').replace("'", "&#039;").strip() title = title.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&amp;").replace('<','&lt;').replace('>','&gt;').replace('"', '&quot;').replace("'", "&#039;").strip()
marseys_used = set() marseys_used = set()
@ -393,8 +406,6 @@ def filter_emojis_only(title, edit=False, graceful=False):
title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']) title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https'])
signal.alarm(0)
if len(title) > 1500 and not graceful: abort(400) if len(title) > 1500 and not graceful: abort(400)
else: return title.replace('\n','').strip() else: return title.replace('\n','').strip()