MarseyWorld/files/helpers/bleach_body.py

104 lines
3.5 KiB
Python

import bleach
from bleach.css_sanitizer import CSSSanitizer
from bleach.linkifier import LinkifyFilter
import functools
from files.helpers.regex import sanitize_url_regex, excessive_css_scale_regex
from files.helpers.config.const import *
allowed_tags = ('a','alpha','audio','b','blink','blockquote','br','center','code','del','details','em','g','gl','h1','h2','h3','h4','h5','h6','hr','i','img','li','lite-youtube','marquee','ol','p','pre','rp','rt','ruby','small','span','spoiler','strike','strong','sub','summary','sup','table','tbody','td','th','thead','tr','u','ul','video')
allowed_tags_runtime = ('div', 'input', 'label', 'score', 'button', 'd')
allowed_css_properties = ('background-color', 'color', 'filter', 'font-weight', 'text-align', 'transform', 'font-variant-caps')
def allowed_attributes(tag, name, value):
if name == 'style':
value = value.lower()
if 'transform' in value and 'scale' in value and excessive_css_scale_regex.search(value):
return False
return True
if tag == 'marquee':
if name in {'direction', 'behavior', 'scrollamount'}: return True
if name in {'height', 'width'}:
try: value = int(value.replace('px', ''))
except: return False
if 0 < value <= 250: return True
if tag == 'a':
if name == 'href' and '\\' not in value and 'xn--' not in value:
return True
if name == 'rel' and value == 'nofollow noopener': return True
if name == 'target' and value == '_blank': return True
if tag == 'img':
if name in {'src','data-src'}: return is_safe_url(value)
if name == 'loading' and value == 'lazy': return True
if name == 'data-bs-toggle' and value == 'tooltip': return True
if name in {'g','b','alpha','glow','party'} and not value: return True
if name in {'alt','title'}: return True
if name == 'class' and value == 'img': return True
if name == 'data-user-submitted' and not value: return True
if tag == 'lite-youtube':
if name == 'params': return True
if name == 'videoid': return True
if tag == 'video':
if name == 'controls' and value == '': return True
if name == 'preload' and value == 'none': return True
if name == 'src': return is_safe_url(value)
if name == 'poster': return is_safe_url(value)
if tag == 'audio':
if name == 'src': return is_safe_url(value)
if name == 'controls' and value == '': return True
if name == 'preload' and value == 'none': return True
if tag == 'p':
if name == 'class' and value in {'mb-0','resizable','yt','text-center'}: return True
if tag == 'span':
if name == 'data-bs-toggle' and value == 'tooltip': return True
if name == 'title': return True
if name == 'alt': return True
if name == 'cide' and not value: return True
if name == 'bounce' and not value: return True
if name == 'id': return True
if tag == 'table':
if name == 'class' and value == 'table': return True
if tag == 'blockquote':
if name == 'class' and value == 'twitter-tweet': return True
if name == 'data-dnt' and value == 'true': return True
if tag in allowed_tags_runtime:
return True
return False
def bleach_body_html(body_html, runtime=False):
css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
tags = allowed_tags
if runtime:
tags += allowed_tags_runtime
body_html = bleach.Cleaner(
tags=tags,
attributes=allowed_attributes,
protocols=['http', 'https'],
css_sanitizer=css_sanitizer,
filters=[
functools.partial(
LinkifyFilter,
skip_tags=["pre","code"],
parse_email=False,
url_re=sanitize_url_regex
)
]
).clean(body_html)
return body_html