diff --git a/.gitignore b/.gitignore index 5b21b74551..1ce8ce8ffd 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ venv/ .sass-cache/ flask_session/ .DS_Store -site_settings.json \ No newline at end of file +site_settings.json +/files/test.py +tags diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 28b03a1089..2f14b084ca 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -1,3 +1,4 @@ +import functools import bleach from bs4 import BeautifulSoup from bleach.css_sanitizer import CSSSanitizer @@ -53,7 +54,7 @@ def allowed_attributes(tag, name, value): except: return False if 0 < value <= 250: return True return False - + if tag == 'a': if name == 'href' and '\\' not in value and 'xn--' not in value: return True @@ -121,10 +122,6 @@ def callback(attrs, new=False): return attrs -def handler(signum, frame): - print("Timeout!", flush=True) - raise Exception("Timeout") - def render_emoji(html, regexp, edit, marseys_used, b=False): emojis = list(regexp.finditer(html)) captured = set() @@ -164,11 +161,31 @@ def render_emoji(html, regexp, edit, marseys_used, b=False): return html +def with_sigalrm_timeout(timeout: int): + 'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds' + + # while trying to test this using time.sleep I discovered that gunicorn does in fact do some + # async so if we timeout on that (or on a db op) then the process is crashed without returning + # a proper 500 error. Oh well. + def sig_handler(signum, frame): + print("Timeout!", flush=True) + raise Exception("Timeout") + + def inner(func): + @functools.wraps(func) + def wrapped(*args, **kwargs): + signal.signal(signal.SIGALRM, sig_handler) + signal.alarm(timeout) + try: + return func(*args, **kwargs) + finally: + signal.alarm(0) + return wrapped + return inner + + +@with_sigalrm_timeout(2) def sanitize(sanitized, edit=False): - - signal.signal(signal.SIGALRM, handler) - signal.alarm(2) - sanitized = sanitized.strip() sanitized = normalize_url(sanitized) @@ -232,9 +249,9 @@ def sanitize(sanitized, edit=False): sanitized = str(soup) - + sanitized = spoiler_regex.sub(r'\1', sanitized) - + marseys_used = set() emojis = list(emoji_regex.finditer(sanitized)) @@ -311,7 +328,7 @@ def sanitize(sanitized, edit=False): attributes=allowed_attributes, protocols=['http', 'https'], css_sanitizer=css_sanitizer, - filters=[partial(LinkifyFilter, skip_tags=["pre"], + filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, callbacks=[callback], url_re=url_re)] ).clean(sanitized) @@ -325,7 +342,7 @@ def sanitize(sanitized, edit=False): href = link.get("href") if not href: continue - + url = urlparse(href) domain = url.netloc url_path = url.path @@ -348,8 +365,6 @@ def sanitize(sanitized, edit=False): if len(sanitized) > 5000: sanitized = showmore_regex.sub(r'\1

\2
', sanitized) - signal.alarm(0) - return sanitized.strip() @@ -373,11 +388,9 @@ def allowed_attributes_emojis(tag, name, value): return False +@with_sigalrm_timeout(1) def filter_emojis_only(title, edit=False, graceful=False): - signal.signal(signal.SIGALRM, handler) - signal.alarm(1) - title = title.replace('‎','').replace('​','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&").replace('<','<').replace('>','>').replace('"', '"').replace("'", "'").strip() marseys_used = set() @@ -393,8 +406,6 @@ def filter_emojis_only(title, edit=False, graceful=False): title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']) - signal.alarm(0) - if len(title) > 1500 and not graceful: abort(400) else: return title.replace('\n','').strip() diff --git a/files/routes/search.py b/files/routes/search.py index a72dbd2640..c7f4099562 100644 --- a/files/routes/search.py +++ b/files/routes/search.py @@ -19,22 +19,22 @@ def searchparse(text): text = text.lower() criteria = {x[0]:x[1] for x in query_regex.findall(text)} - for x in criteria: if x in valid_params: text = text.replace(f"{x}:{criteria[x]}", "") - text=text.strip() - + text = text.strip() + re_search_token = re.compile('"([^"]*)"|(\S+)') if text: - criteria['q']=text + criteria['q'] = [] + for m in re_search_token.finditer(text): + token = m[1] if m[1] else m[2] + # Escape SQL pattern matching special characters + token = token.replace('\\', '').replace('_', '\_').replace('%', '\%') + criteria['q'].append(token) return criteria - - - - @app.get("/search/posts") @auth_required def searchposts(v): @@ -50,15 +50,6 @@ def searchposts(v): - - - - - - - - - posts = g.db.query(Submission.id).filter(Submission.author_id.notin_(v.userblocks)) if not v.paid_dues: posts = posts.filter_by(club=False) @@ -66,7 +57,7 @@ def searchposts(v): if v.admin_level < 2: posts = posts.filter(Submission.deleted_utc == 0, Submission.is_banned == False, Submission.private == False) - + if 'author' in criteria: posts = posts.filter(Submission.ghost == False) @@ -91,9 +82,8 @@ def searchposts(v): else: posts = posts.filter(Submission.author_id == author.id) if 'q' in criteria: - words=criteria['q'].split() - words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split() - words = [or_(Submission.title.ilike('%'+x+'%'), Submission.body.ilike('%'+x+'%')) for x in words] + words = [or_(Submission.title.ilike('%'+x+'%'), Submission.body.ilike('%'+x+'%')) \ + for x in criteria['q']] posts = posts.filter(*words) if 'over18' in criteria: posts = posts.filter(Submission.over_18==True) @@ -206,9 +196,7 @@ def searchcomments(v): else: comments = comments.filter(Comment.author_id == author.id) if 'q' in criteria: - words = criteria['q'].replace('\\', '').replace('_', '\_').replace('%', '\%').strip().split() - - words = [Comment.body.ilike('%'+x+'%') for x in words] + words = [Comment.body.ilike('%'+x+'%') for x in criteria['q']] comments = comments.filter(*words) if 'over18' in criteria: comments = comments.filter(Comment.over_18 == True) diff --git a/files/templates/contact.html b/files/templates/contact.html index f94b0ee702..daada19082 100644 --- a/files/templates/contact.html +++ b/files/templates/contact.html @@ -23,9 +23,6 @@

Use this form to contact {{SITE_NAME}} Admins.

- - -
diff --git a/files/templates/util/assetcache.html b/files/templates/util/assetcache.html index ad7f89acbc..c9dda2f25f 100644 --- a/files/templates/util/assetcache.html +++ b/files/templates/util/assetcache.html @@ -35,7 +35,7 @@ set CACHE_VER = { set CACHE_VER_SITEIMG = { 'rDrama': 2000, 'FunOnly': 2000, - 'PCM': 2000, + 'PCM': 2002, 'Cringetopia': 2000, 'WPD': 2000, 'LGBDropTheT': 2010, @@ -47,5 +47,5 @@ set CACHE_VER_SITEIMG = { {%- endmacro -%} {%- macro asset_siteimg(name) -%} -/i/{{SITE_NAME}}/{{name}}?v={{ CACHE_VER_SITEIMG[name] }} +/i/{{SITE_NAME}}/{{name}}?v={{ CACHE_VER_SITEIMG[SITE_NAME] }} {%- endmacro -%} diff --git a/schema.sql b/schema.sql index 0e9d911f4d..f4bd6835ee 100644 --- a/schema.sql +++ b/schema.sql @@ -2,7 +2,7 @@ -- PostgreSQL database dump -- --- Dumped from database version 13.7 +-- Dumped from database version 14.4 -- Dumped by pg_dump version 14.2 (Ubuntu 14.2-1.pgdg20.04+1) SET statement_timeout = 0; @@ -2226,7 +2226,7 @@ ALTER TABLE ONLY public.comment_option_votes -- PostgreSQL database dump -- --- Dumped from database version 13.7 +-- Dumped from database version 14.4 -- Dumped by pg_dump version 14.2 (Ubuntu 14.2-1.pgdg20.04+1) SET statement_timeout = 0;