fix the TLD matching to exclude numbers (#371)

master
official-techsupport 2022-09-25 01:06:53 +03:00 committed by GitHub
parent f658970481
commit bd8b96c1f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -95,7 +95,7 @@ def build_url_re(protocols):
return re.compile( return re.compile(
r"""\(*# Match any opening parentheses. r"""\(*# Match any opening parentheses.
\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http:// \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
([\w-]+\.)+(?:\w{{2,20}})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)? ([\w-]+\.)+(?:[A-Za-z]{{2,20}})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)? (?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
# /path/zz (excluding "unsafe" chars from RFC 1738, # /path/zz (excluding "unsafe" chars from RFC 1738,
# except for ~, which happens in practice) # except for ~, which happens in practice)