From fae0bee0d93302259453e8a778467076d81958df Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Fri, 17 Jun 2022 22:37:27 +0200 Subject: [PATCH] Refactored mention sanitization (stolen commit from themotte) --- files/helpers/get.py | 25 ++++++++++++++++++++++++- files/helpers/sanitize.py | 28 +++++++++++++++++----------- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/files/helpers/get.py b/files/helpers/get.py index 59b30df6c2..3a9ee539cb 100644 --- a/files/helpers/get.py +++ b/files/helpers/get.py @@ -63,6 +63,29 @@ def get_user(username, v=None, graceful=False): return user +def get_users(usernames, v=None, graceful=False): + if not usernames: + if not graceful: abort(404) + else: return [] + + def clean(n): + return n.replace('\\', '').replace('_', '\_').replace('%', '').strip() + + usernames = [ clean(n) for n in usernames ] + + users = g.db.query(User).filter( + or_( + User.username == any_(usernames), + User.original_username == any_(usernames) + ) + ).all() + + if not users: + if not graceful: abort(404) + else: return [] + + return users + def get_account(id, v=None): try: id = int(id) @@ -286,4 +309,4 @@ def get_domain(s): doms = sorted(doms, key=lambda x: len(x.domain), reverse=True) - return doms[0] \ No newline at end of file + return doms[0] diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index c51156abb6..638c510624 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -181,24 +181,30 @@ def sanitize(sanitized, alert=False, edit=False): sanitized = sanitized.replace('‎','').replace('​','').replace("\ufeff", "").replace("𒐪","") if alert: - captured = [] - for i in mention_regex2.finditer(sanitized): - if i.group(0) in captured: continue - captured.append(i.group(0)) + matches = { g.group(1):g for g in mention_regex2.finditer(sanitized) if g } + users = get_users(matches.keys(),graceful=True) - u = get_user(i.group(1), graceful=True) + captured = [] + for u in users: if u: - sanitized = sanitized.replace(i.group(0), f'''

@{u.username}''') + i = matches.get(u.username) or matches.get(u.original_username) + if i.group(0) not in captured: + captured.append(i.group(0)) + sanitized = sanitized.replace(i.group(0), f'''

@{u.username}''') else: sanitized = reddit_regex.sub(r'\1/\2', sanitized) - sanitized = sub_regex.sub(r'\1/\2', sanitized) - for i in mention_regex.finditer(sanitized): - u = get_user(i.group(2), graceful=True) + matches = [ m for m in mention_regex.finditer(sanitized) if m ] + names = set( m.group(2) for m in matches ) + users = get_users(names,graceful=True) - if u and (not (g.v and g.v.any_block_exists(u)) or g.v.admin_level > 1): - sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}@{u.username}''', 1) + for u in users: + if not u: continue + m = [ m for m in matches if u.username == m.group(2) or u.original_username == m.group(2) ] + for i in m: + if not (g.v and g.v.any_block_exists(u)) or g.v.admin_level > 1: + sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}@{u.username}''', 1) sanitized = normalize_url(sanitized)