diff --git a/.gitignore b/.gitignore
index 5b21b7455..1ce8ce8ff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,6 @@ venv/
.sass-cache/
flask_session/
.DS_Store
-site_settings.json
\ No newline at end of file
+site_settings.json
+/files/test.py
+tags
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 28b03a108..2f14b084c 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -1,3 +1,4 @@
+import functools
import bleach
from bs4 import BeautifulSoup
from bleach.css_sanitizer import CSSSanitizer
@@ -53,7 +54,7 @@ def allowed_attributes(tag, name, value):
except: return False
if 0 < value <= 250: return True
return False
-
+
if tag == 'a':
if name == 'href' and '\\' not in value and 'xn--' not in value:
return True
@@ -121,10 +122,6 @@ def callback(attrs, new=False):
return attrs
-def handler(signum, frame):
- print("Timeout!", flush=True)
- raise Exception("Timeout")
-
def render_emoji(html, regexp, edit, marseys_used, b=False):
emojis = list(regexp.finditer(html))
captured = set()
@@ -164,11 +161,31 @@ def render_emoji(html, regexp, edit, marseys_used, b=False):
return html
+def with_sigalrm_timeout(timeout: int):
+ 'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'
+
+ # while trying to test this using time.sleep I discovered that gunicorn does in fact do some
+ # async so if we timeout on that (or on a db op) then the process is crashed without returning
+ # a proper 500 error. Oh well.
+ def sig_handler(signum, frame):
+ print("Timeout!", flush=True)
+ raise Exception("Timeout")
+
+ def inner(func):
+ @functools.wraps(func)
+ def wrapped(*args, **kwargs):
+ signal.signal(signal.SIGALRM, sig_handler)
+ signal.alarm(timeout)
+ try:
+ return func(*args, **kwargs)
+ finally:
+ signal.alarm(0)
+ return wrapped
+ return inner
+
+
+@with_sigalrm_timeout(2)
def sanitize(sanitized, edit=False):
-
- signal.signal(signal.SIGALRM, handler)
- signal.alarm(2)
-
sanitized = sanitized.strip()
sanitized = normalize_url(sanitized)
@@ -232,9 +249,9 @@ def sanitize(sanitized, edit=False):
sanitized = str(soup)
-
+
sanitized = spoiler_regex.sub(r'\1', sanitized)
-
+
marseys_used = set()
emojis = list(emoji_regex.finditer(sanitized))
@@ -311,7 +328,7 @@ def sanitize(sanitized, edit=False):
attributes=allowed_attributes,
protocols=['http', 'https'],
css_sanitizer=css_sanitizer,
- filters=[partial(LinkifyFilter, skip_tags=["pre"],
+ filters=[partial(LinkifyFilter, skip_tags=["pre"],
parse_email=False, callbacks=[callback], url_re=url_re)]
).clean(sanitized)
@@ -325,7 +342,7 @@ def sanitize(sanitized, edit=False):
href = link.get("href")
if not href: continue
-
+
url = urlparse(href)
domain = url.netloc
url_path = url.path
@@ -348,8 +365,6 @@ def sanitize(sanitized, edit=False):
if len(sanitized) > 5000:
sanitized = showmore_regex.sub(r'\1
\2
', sanitized)
- signal.alarm(0)
-
return sanitized.strip()
@@ -373,11 +388,9 @@ def allowed_attributes_emojis(tag, name, value):
return False
+@with_sigalrm_timeout(1)
def filter_emojis_only(title, edit=False, graceful=False):
- signal.signal(signal.SIGALRM, handler)
- signal.alarm(1)
-
title = title.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&").replace('<','<').replace('>','>').replace('"', '"').replace("'", "'").strip()
marseys_used = set()
@@ -393,8 +406,6 @@ def filter_emojis_only(title, edit=False, graceful=False):
title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https'])
- signal.alarm(0)
-
if len(title) > 1500 and not graceful: abort(400)
else: return title.replace('\n','').strip()
diff --git a/files/routes/search.py b/files/routes/search.py
index a72dbd264..c7f409956 100644
--- a/files/routes/search.py
+++ b/files/routes/search.py
@@ -19,22 +19,22 @@ def searchparse(text):
text = text.lower()
criteria = {x[0]:x[1] for x in query_regex.findall(text)}
-
for x in criteria:
if x in valid_params:
text = text.replace(f"{x}:{criteria[x]}", "")
- text=text.strip()
-
+ text = text.strip()
+ re_search_token = re.compile('"([^"]*)"|(\S+)')
if text:
- criteria['q']=text
+ criteria['q'] = []
+ for m in re_search_token.finditer(text):
+ token = m[1] if m[1] else m[2]
+ # Escape SQL pattern matching special characters
+ token = token.replace('\\', '').replace('_', '\_').replace('%', '\%')
+ criteria['q'].append(token)
return criteria
-
-
-
-
@app.get("/search/posts")
@auth_required
def searchposts(v):
@@ -50,15 +50,6 @@ def searchposts(v):
-
-
-
-
-
-
-
-
-
posts = g.db.query(Submission.id).filter(Submission.author_id.notin_(v.userblocks))
if not v.paid_dues: posts = posts.filter_by(club=False)
@@ -66,7 +57,7 @@ def searchposts(v):
if v.admin_level < 2:
posts = posts.filter(Submission.deleted_utc == 0, Submission.is_banned == False, Submission.private == False)
-
+
if 'author' in criteria:
posts = posts.filter(Submission.ghost == False)
@@ -91,9 +82,8 @@ def searchposts(v):
else: posts = posts.filter(Submission.author_id == author.id)
if 'q' in criteria:
- words=criteria['q'].split()
- words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split()
- words = [or_(Submission.title.ilike('%'+x+'%'), Submission.body.ilike('%'+x+'%')) for x in words]
+ words = [or_(Submission.title.ilike('%'+x+'%'), Submission.body.ilike('%'+x+'%')) \
+ for x in criteria['q']]
posts = posts.filter(*words)
if 'over18' in criteria: posts = posts.filter(Submission.over_18==True)
@@ -206,9 +196,7 @@ def searchcomments(v):
else: comments = comments.filter(Comment.author_id == author.id)
if 'q' in criteria:
- words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split()
-
- words = [Comment.body.ilike('%'+x+'%') for x in words]
+ words = [Comment.body.ilike('%'+x+'%') for x in criteria['q']]
comments = comments.filter(*words)
if 'over18' in criteria: comments = comments.filter(Comment.over_18 == True)
diff --git a/files/templates/contact.html b/files/templates/contact.html
index f94b0ee70..daada1908 100644
--- a/files/templates/contact.html
+++ b/files/templates/contact.html
@@ -23,9 +23,6 @@
Use this form to contact {{SITE_NAME}} Admins.
-
-
-