From 3eb788103c1de05c503a4645a640c66faa526a6b Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Sun, 19 Jun 2022 19:25:55 +0200 Subject: [PATCH] allow only ascii characters in links (https://rdrama.net/comment/2150032) --- files/helpers/const.py | 2 ++ files/helpers/sanitize.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/files/helpers/const.py b/files/helpers/const.py index b8efdc00a..1c617ed6a 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -1017,4 +1017,6 @@ linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) greentext_regex = re.compile("(\n|^)>([^ ][^\n]*)", flags=re.A) +ascii_only_regex = re.compile("[ -~]+", flags=re.A) + def make_name(*args, **kwargs): return request.base_url \ No newline at end of file diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index b1b7cbf74..12cc03d0d 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -54,7 +54,8 @@ def allowed_attributes(tag, name, value): return False if tag == 'a': - if name == 'href' and '\\' not in value: return True + if name == 'href' and '\\' not in value and 'xn--' not in value: + return True if name == 'rel' and value == 'nofollow noopener noreferrer': return True if name == 'target' and value == '_blank': return True return False @@ -107,7 +108,7 @@ def callback(attrs, new=False): href = attrs[(None, "href")] # \ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see cool - if "\\" in href: + if "\\" in href or not ascii_only_regex.fullmatch(href): attrs["_text"] = href # Laugh at this user del attrs[(None, "href")] # Make unclickable and reset harmful payload return attrs