From b7032a1449596fe8c10cbb4f0c7405f506f54796 Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Fri, 15 Jul 2022 15:17:39 +0200 Subject: [PATCH] RFCs are for incels --- files/helpers/sanitize.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index c5460d2d1..df3506e26 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -101,6 +101,32 @@ def allowed_attributes(tag, name, value): return False +def build_url_re(tlds, protocols): + """Builds the url regex used by linkifier + + If you want a different set of tlds or allowed protocols, pass those in + and stomp on the existing ``url_re``:: + + from bleach import linkifier + + my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols) + + linker = LinkifyFilter(url_re=my_url_re) + + """ + return re.compile( + r"""\(* # Match any opening parentheses. + \b(?"]*)? + # /path/zz (excluding "unsafe" chars from RFC 1738, + # except for # and ~, which happen in practice) + """.format( + "|".join(sorted(protocols)), "|".join(sorted(tlds)) + ), + re.IGNORECASE | re.VERBOSE | re.UNICODE, + ) + url_re = build_url_re(tlds=TLDS, protocols=['http', 'https']) def callback(attrs, new=False):