From ab2464e83461912109ef4d8d5d6040036ae2bdfb Mon Sep 17 00:00:00 2001 From: Aevann Date: Mon, 12 Feb 2024 09:14:19 +0200 Subject: [PATCH] #bleached --- files/classes/comment.py | 5 +- files/classes/post.py | 3 ++ files/helpers/bleach_body.py | 92 ++++++++++++++++++++++++++++++++++++ files/helpers/sanitize.py | 81 ++----------------------------- 4 files changed, 103 insertions(+), 78 deletions(-) create mode 100644 files/helpers/bleach_body.py diff --git a/files/classes/comment.py b/files/classes/comment.py index 8a9257a1f..d99ec794d 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -17,6 +17,7 @@ from files.helpers.slurs_and_profanities import * from files.helpers.lazy import lazy from files.helpers.regex import * from files.helpers.sorting_and_time import * +from files.helpers.bleach_body import * from .saves import CommentSaveRelationship @@ -154,7 +155,7 @@ def add_options(self, body, v): option_body += f'''>''' + option_body += f'"> - {o.upvotes} votes''' if o.exclusive > 1: s = '##' elif o.exclusive: s = '&&' @@ -425,6 +426,8 @@ class Comment(Base): body = normalize_urls_runtime(body, v) + body = bleach_body_html(body, runtime=True) + return body @lazy diff --git a/files/classes/post.py b/files/classes/post.py index 9d981f477..5de45210d 100644 --- a/files/classes/post.py +++ b/files/classes/post.py @@ -14,6 +14,7 @@ from files.helpers.slurs_and_profanities import * from files.helpers.lazy import lazy from files.helpers.regex import * from files.helpers.sorting_and_time import make_age_string +from files.helpers.bleach_body import * from .comment import * from .polls import * @@ -324,6 +325,8 @@ class Post(Base): body = normalize_urls_runtime(body, v) + body = bleach_body_html(body, runtime=True) + return body @lazy diff --git a/files/helpers/bleach_body.py b/files/helpers/bleach_body.py new file mode 100644 index 000000000..68dbea21a --- /dev/null +++ b/files/helpers/bleach_body.py @@ -0,0 +1,92 @@ +import bleach +from bleach.css_sanitizer import CSSSanitizer +from bleach.linkifier import LinkifyFilter +from functools import partial + +from files.helpers.regex import sanitize_url_regex + +allowed_tags = ('a','audio','b','big','blink','blockquote','br','center','code','del','details','em','g','gl','h1','h2','h3','h4','h5','h6','hr','i','img','li','lite-youtube','marquee','ol','p','pre','rp','rt','ruby','small','span','spoiler','strike','strong','sub','summary','sup','table','tbody','td','th','thead','tr','u','ul','video') +allowed_tags_runstime = ('div', 'input', 'label', 'score', 'button') +allowed_css_properties = ('background-color', 'color', 'filter', 'font-weight', 'text-align', 'transform') + +def allowed_attributes(tag, name, value): + + if name == 'style': + value = value.lower() + if 'transform' in value and 'scale' in value and ('rotate' in value or 'skew' in value): + return False + return True + + if tag == 'marquee': + if name in {'direction', 'behavior', 'scrollamount'}: return True + if name in {'height', 'width'}: + try: value = int(value.replace('px', '')) + except: return False + if 0 < value <= 250: return True + + if tag == 'a': + if name == 'href' and '\\' not in value and 'xn--' not in value: + return True + if name == 'rel' and value == 'nofollow noopener': return True + if name == 'target' and value == '_blank': return True + + if tag == 'img': + if name in {'src','data-src'}: return is_safe_url(value) + if name == 'loading' and value == 'lazy': return True + if name == 'data-bs-toggle' and value == 'tooltip': return True + if name in {'g','b','glow','party'} and not value: return True + if name in {'alt','title'}: return True + if name == 'class' and value == 'img': return True + if name == 'data-user-submitted' and not value: return True + + if tag == 'lite-youtube': + if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True + if name == 'videoid': return True + + if tag == 'video': + if name == 'controls' and value == '': return True + if name == 'preload' and value == 'none': return True + if name == 'src': return is_safe_url(value) + + if tag == 'audio': + if name == 'src': return is_safe_url(value) + if name == 'controls' and value == '': return True + if name == 'preload' and value == 'none': return True + + if tag == 'p': + if name == 'class' and value in {'mb-0','resizable','yt','text-center'}: return True + + if tag == 'span': + if name == 'data-bs-toggle' and value == 'tooltip': return True + if name == 'title': return True + if name == 'alt': return True + if name == 'cide' and not value: return True + if name == 'bounce' and not value: return True + if name == 'id': return True + + if tag == 'table': + if name == 'class' and value == 'table': return True + + if tag in allowed_tags_runstime: + return True + + return False + + +def bleach_body_html(body_html, runtime=False): + css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties) + + tags = allowed_tags + if runtime: + tags += allowed_tags_runstime + + body_html = bleach.Cleaner( + tags=tags, + attributes=allowed_attributes, + protocols=['http', 'https'], + css_sanitizer=css_sanitizer, + filters=[partial(LinkifyFilter, skip_tags=["pre"], + parse_email=False, url_re=sanitize_url_regex)] + ).clean(body_html) + + return body_html diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 62da5df83..0434907d1 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -2,7 +2,6 @@ import functools import random import re import signal -from functools import partial from os import path, listdir from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time @@ -30,70 +29,7 @@ from files.helpers.marsify import * from files.helpers.owoify import * from files.helpers.sharpen import * from files.helpers.queenify import * - - -allowed_tags = ('a','audio','b','big','blink','blockquote','br','center','code','del','details','em','g','gl','h1','h2','h3','h4','h5','h6','hr','i','img','li','lite-youtube','marquee','ol','p','pre','rp','rt','ruby','small','span','spoiler','strike','strong','sub','summary','sup','table','tbody','td','th','thead','tr','u','ul','video') - -allowed_styles = ['background-color', 'color', 'filter', 'font-weight', 'text-align', 'transform'] - -def allowed_attributes(tag, name, value): - - if name == 'style': - value = value.lower() - if 'transform' in value and 'scale' in value and ('rotate' in value or 'skew' in value): - return False - return True - - if tag == 'marquee': - if name in {'direction', 'behavior', 'scrollamount'}: return True - if name in {'height', 'width'}: - try: value = int(value.replace('px', '')) - except: return False - if 0 < value <= 250: return True - - if tag == 'a': - if name == 'href' and '\\' not in value and 'xn--' not in value: - return True - if name == 'rel' and value == 'nofollow noopener': return True - if name == 'target' and value == '_blank': return True - - if tag == 'img': - if name in {'src','data-src'}: return is_safe_url(value) - if name == 'loading' and value == 'lazy': return True - if name == 'data-bs-toggle' and value == 'tooltip': return True - if name in {'g','b','glow','party'} and not value: return True - if name in {'alt','title'}: return True - if name == 'class' and value == 'img': return True - if name == 'data-user-submitted' and not value: return True - - if tag == 'lite-youtube': - if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True - if name == 'videoid': return True - - if tag == 'video': - if name == 'controls' and value == '': return True - if name == 'preload' and value == 'none': return True - if name == 'src': return is_safe_url(value) - - if tag == 'audio': - if name == 'src': return is_safe_url(value) - if name == 'controls' and value == '': return True - if name == 'preload' and value == 'none': return True - - if tag == 'p': - if name == 'class' and value in {'mb-0','resizable','yt','text-center'}: return True - - if tag == 'span': - if name == 'data-bs-toggle' and value == 'tooltip': return True - if name == 'title': return True - if name == 'alt': return True - if name == 'cide' and not value: return True - if name == 'bounce' and not value: return True - - if tag == 'table': - if name == 'class' and value == 'table': return True - - return False +from files.helpers.bleach_body import * def create_comment_duplicated(text_html): new_comment = Comment(author_id=AUTOJANNY_ID, @@ -556,18 +492,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = sanitized.replace('

', '') - allowed_css_properties = allowed_styles.copy() - if v and v.chud: - allowed_css_properties.remove('filter') - - css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties) - sanitized = bleach.Cleaner(tags=allowed_tags, - attributes=allowed_attributes, - protocols=['http', 'https'], - css_sanitizer=css_sanitizer, - filters=[partial(LinkifyFilter, skip_tags=["pre"], - parse_email=False, url_re=sanitize_url_regex)] - ).clean(sanitized) + sanitized = bleach_body_html(sanitized) #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) soup = BeautifulSoup(sanitized, 'lxml') @@ -674,6 +599,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis if sanitized.count("blur(") + sanitized.count("drop-shadow(") > allowed_count: return error("Max 5 usages of 'blur' and 'drop-shadow'!") + sanitized = bleach_body_html(sanitized) + return sanitized.strip() def allowed_attributes_emojis(tag, name, value):