allow only ascii characters in links (https://rdrama.net/comment/2150032)

master
Aevann1 2022-06-19 19:25:55 +02:00
parent 14ffb39ba6
commit 3eb788103c
2 changed files with 5 additions and 2 deletions

View File

@ -1017,4 +1017,6 @@ linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A)
greentext_regex = re.compile("(\n|^)>([^ ][^\n]*)", flags=re.A) greentext_regex = re.compile("(\n|^)>([^ ][^\n]*)", flags=re.A)
ascii_only_regex = re.compile("[ -~]+", flags=re.A)
def make_name(*args, **kwargs): return request.base_url def make_name(*args, **kwargs): return request.base_url

View File

@ -54,7 +54,8 @@ def allowed_attributes(tag, name, value):
return False return False
if tag == 'a': if tag == 'a':
if name == 'href' and '\\' not in value: return True if name == 'href' and '\\' not in value and 'xn--' not in value:
return True
if name == 'rel' and value == 'nofollow noopener noreferrer': return True if name == 'rel' and value == 'nofollow noopener noreferrer': return True
if name == 'target' and value == '_blank': return True if name == 'target' and value == '_blank': return True
return False return False
@ -107,7 +108,7 @@ def callback(attrs, new=False):
href = attrs[(None, "href")] href = attrs[(None, "href")]
# \ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a> # \ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
if "\\" in href: if "\\" in href or not ascii_only_regex.fullmatch(href):
attrs["_text"] = href # Laugh at this user attrs["_text"] = href # Laugh at this user
del attrs[(None, "href")] # Make unclickable and reset harmful payload del attrs[(None, "href")] # Make unclickable and reset harmful payload
return attrs return attrs