Improve slur replacer performance, part deux.

h/t @official-techsupport again for finding another optimization. We are now cumulatively at about 70% speedup over original. It remains one of the hottest paths of the codebase in relative terms, but its absolute performance demands have decreased enough to buy us potentially substantial time on it.
2022-06-07 21:56:14 -04:00 · 2022-06-07 21:56:14 -04:00 · ed240735fb
parent da82ea81d8
commit ed240735fb
1 changed files with 6 additions and 6 deletions
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -899,17 +899,17 @@ email_regex = re.compile('[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}', flags=re.A|re.
 utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
 utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)

-slur_regex = re.compile(f"(<[^>]*>)|({single_words})", flags=re.I|re.A)
-slur_regex_upper = re.compile(f"(<[^>]*>)|({single_words.upper()})", flags=re.A)
+slur_regex = re.compile(f"<[^>]*>|{single_words}", flags=re.I|re.A)
+slur_regex_upper = re.compile(f"<[^>]*>|{single_words.upper()}", flags=re.A)
 torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A)
 torture_regex2 = re.compile("(^|\s)i'm ", flags=re.I|re.A)
 torture_regex_exclude = re.compile('^\s*>', flags=re.A)

 def sub_matcher(match, upper=False):
-	if match.group(1):
-		return match.group(1)
-	else: # implies match.group(2)
-		repl = SLURS[match.group(2).lower()]
+	if match.group(0).startswith('<'):
+		return match.group(0)
+	else:
+		repl = SLURS[match.group(0).lower()]
 		return repl if not upper else repl.upper()

 def sub_matcher_upper(match):