forked from MarseyWorld/MarseyWorld
Fix timeout in sanitize from link_fix_regex.
h/t to @official-techsupport for finding and help fixing this bug. When given certain pathological input, `sanitize` would time out (notably only on posts, rather than comments, perhaps due to the longer maximum length of input). For example, using as input the result of: with open("test.txt", "w") as f: for i in range(26): f.write(f":{chr(ord('a') + i)}: ") f.write('x' * 20_000) We believe this to be because of some combination of the greedy quantifiers and the negative lookahead before the match. The regex was rewritten to (in theory) have much more linear performance.master
parent
70c2b2cffa
commit
aaf718c78c
|
@ -76,7 +76,7 @@ yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A)
|
||||||
|
|
||||||
image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+,:]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp)(\?[\w\-.#&/=\?@%;+,:]*)?)($|\s)", flags=re.I|re.A)
|
image_regex = re.compile("(^|\s)(https:\/\/[\w\-.#&/=\?@%;+,:]{5,250}(\.png|\.jpg|\.jpeg|\.gif|\.webp)(\?[\w\-.#&/=\?@%;+,:]*)?)($|\s)", flags=re.I|re.A)
|
||||||
|
|
||||||
link_fix_regex = re.compile("(?!.*(http|\/))(.*\[[^\]]+\]\()([^)]+\))", flags=re.A)
|
link_fix_regex = re.compile("(\[.*?\]\()(?!http|/)(.*?\))", flags=re.A)
|
||||||
|
|
||||||
css_regex = re.compile('https?:\/\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.I|re.A)
|
css_regex = re.compile('https?:\/\/[\w:~,()\-.#&\/=?@%;+]*', flags=re.I|re.A)
|
||||||
|
|
||||||
|
|
|
@ -182,7 +182,7 @@ def sanitize(sanitized, edit=False):
|
||||||
|
|
||||||
sanitized = image_check_regex.sub(r'\1', sanitized)
|
sanitized = image_check_regex.sub(r'\1', sanitized)
|
||||||
|
|
||||||
sanitized = link_fix_regex.sub(r'\2https://\3', sanitized)
|
sanitized = link_fix_regex.sub(r'\1https://\2', sanitized)
|
||||||
|
|
||||||
sanitized = markdown(sanitized)
|
sanitized = markdown(sanitized)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue