From ed240735fb6b31253b5ea6ebc39c6826ea2acc77 Mon Sep 17 00:00:00 2001 From: TLSM Date: Tue, 7 Jun 2022 21:56:14 -0400 Subject: [PATCH] Improve slur replacer performance, part deux. h/t @official-techsupport again for finding another optimization. We are now cumulatively at about 70% speedup over original. It remains one of the hottest paths of the codebase in relative terms, but its absolute performance demands have decreased enough to buy us potentially substantial time on it. --- files/helpers/const.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/files/helpers/const.py b/files/helpers/const.py index be60601f6..2ab1e56b7 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -899,17 +899,17 @@ email_regex = re.compile('[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', flags=re.A|re. utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) -slur_regex = re.compile(f"(<[^>]*>)|({single_words})", flags=re.I|re.A) -slur_regex_upper = re.compile(f"(<[^>]*>)|({single_words.upper()})", flags=re.A) +slur_regex = re.compile(f"<[^>]*>|{single_words}", flags=re.I|re.A) +slur_regex_upper = re.compile(f"<[^>]*>|{single_words.upper()}", flags=re.A) torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A) torture_regex_exclude = re.compile('^\s*>', flags=re.A) def sub_matcher(match, upper=False): - if match.group(1): - return match.group(1) - else: # implies match.group(2) - repl = SLURS[match.group(2).lower()] + if match.group(0).startswith('<'): + return match.group(0) + else: + repl = SLURS[match.group(0).lower()] return repl if not upper else repl.upper() def sub_matcher_upper(match):