From 67e531c4e4fc2af718a3de7415debf4158ec33a1 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Sun, 10 Sep 2023 14:26:09 -0400 Subject: [PATCH 1/8] Merge shit idk --- files/helpers/regex.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 35dbc4994..9b87436b7 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -118,7 +118,15 @@ twitch_regex = re.compile('(https:\/\/)?(www\.)?twitch.tv\/(.*)', flags=re.I|re. link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))" + NOT_IN_CODE_OR_LINKS, flags=re.A) -css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW +css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW <-- you probably dont actually need this anymore lol (CSP covers it) +css_style_attr_regex = re.compile('\s*([\w-]+?)\s*:((".*?"|\'.*?\'|\(.*?\)|{.*?}|\[.*?]|[^;])*);?', flags=re.I|re.A) +""" +CSS style attribute regex explanation: +Each match is one declaration. (Example: "color: red;") +Capture groups: +1. The property name (Example: "color") +2. The value, excluding the trailing ";", but including whitespace (Example: " red") +""" linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) -- 2.34.1 From 25a33b5538a6f50e324a218ed189b360549e29b7 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Sun, 17 Sep 2023 23:01:40 -0400 Subject: [PATCH 2/8] Wrote a lot of MarseyFX --- files/helpers/marseyfx/modifiers.py | 107 +++++++++++++++ files/helpers/marseyfx/parser.py | 107 +++++++++++++++ files/helpers/marseyfx/tokenizer.py | 198 ++++++++++++++++++++++++++++ files/helpers/regex.py | 2 + files/helpers/sanitize.py | 45 +++---- 5 files changed, 432 insertions(+), 27 deletions(-) create mode 100644 files/helpers/marseyfx/modifiers.py create mode 100644 files/helpers/marseyfx/parser.py create mode 100644 files/helpers/marseyfx/tokenizer.py diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py new file mode 100644 index 000000000..37c44f993 --- /dev/null +++ b/files/helpers/marseyfx/modifiers.py @@ -0,0 +1,107 @@ +from bs4 import BeautifulSoup, Tag +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.parser import Modifier +from files.helpers.marseyfx.tokenizer import StringLiteralToken + +modifier_whitelist = [] + +def modifier(fn): + modifier_whitelist.append(fn.__name__) + + def wrapper(*args, **kwargs): + args[0].el['class'].append('marseyfx-modifier-' + fn.__name__) + return fn(*args, **kwargs) + return wrapper + +class Modified: + soup: BeautifulSoup + el: Tag #BeautifulSoup element + + def __init__(self, el): + self.soup = BeautifulSoup() + self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container')) + + def add_class(self, class_: str): + self.el.attrs['class'].append(' ' + class_) + + def apply_modifiers(self, modifiers: list[Modifier]): + for modifier in modifiers: + if modifier.name in modifier_whitelist: + getattr(self, modifier.name)(*modifier.args) + + # Using this instead of throwing everything in a string and then parsing it helps + # mitigate the risk of XSS attacks + def image(self, name: str): + return self.soup.new_tag( + 'img', + loading='lazy', + class_=f'marseyfx-{name}', + src=f'{SITE_FULL_IMAGES}/i/{name}.webp' + ) + + def underlay(self, underlay: Tag): + self.el.insert(0, underlay) + + def overlay(self, overlay: Tag): + self.el.append(overlay) + + @modifier + def pat(self): + self.overlay(self.el, self.image('pat')) + + @modifier + def love(self): + self.overlay(self.el, self.image('love-foreground')) + self.underlay(self.el, self.image('love-background')) + + @modifier + def talking(self): + self.overlay(self.el, self.image('talking')) + + @modifier + def genocide(self): + pass + + @modifier + def says(self, msg): + if not isinstance(msg, StringLiteralToken): + return + + self.overlay(self.el, self.image('says')) + self.el.append(self.soup.new_tag( + 'span', + class_='marseyfx-modifier-says-text', + string=msg.value + )) + + @modifier + def fallover(self): + self.el.wrap(self.soup.new_tag( + 'div', + class_='marseyfx-modifier-fallover-container' + )) + + @modifier + def transform(self, transformstyle: str): + if not transformstyle.fullmatch(r'[\w()\s%\.]*'): + return + + if not 'style' in self.el.attrs: + self.el.attrs['style'] = '' + + self.el.attrs['style'] += f'transform: {transformstyle};' + + @modifier + def enraged(self): + self.underlay(self.soup.new_tag( + 'div', + class_='marseyfx-enraged-underlay' + )) + + @modifier + def corrupted(self): + pass + + @modifier + def wavy(self): + self.el.wrap(self.soup.new_tag('svg')) \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py new file mode 100644 index 000000000..04f936da2 --- /dev/null +++ b/files/helpers/marseyfx/parser.py @@ -0,0 +1,107 @@ +from tokenize import Token + +from bs4 import BeautifulSoup +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken +from modified import Modified + +class Modifier: + name: str + args: list[Token] + + def __init__(self, name: str, args: list[Token]): + self.name = name + self.args = args + +emoji_replacers = { + '!': 'is_flipped', + '#': 'is_big', + '@': 'is_user' +} + +class Emoji: + name: str + token: Token + is_big = False + is_flipped = False + is_user = False + modifiers: list[Modifier] + + def __init__(self, name: str, modifiers, token: Token): + for symbol, value in emoji_replacers.items(): + name = name.replace(symbol, '') + setattr(self, value, True) + + self.name = name + self.modifiers = modifiers + self.token = token + def create_el(self): + soup = BeautifulSoup() + + el = soup.new_tag( + 'img', + loading='lazy', + class_='marseyfx-emoji', + src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp' + ) + + if (self.is_big): + el['class'].append(' marseyfx-big') + + if (self.is_flipped): + el['class'].append(' marseyfx-flipped') + + mod = Modified(el) + mod.apply_modifiers(self.modifiers) + + return mod.el + +def parse_emoji(str: str): + tokenizer = Tokenizer(str) + token = tokenizer.parse_next_tokens() + + if len(tokenizer.errors) > 0: + return False, None, token + + emoji = parse_from_token(tokenizer, token) + + if not emoji: + return False, None, token + + return True, emoji, token + +def parse_from_token(tokenizer: Tokenizer, token: GroupToken): + if not isinstance(token, GroupToken): + tokenizer.error('Malformed token -- Expected a group token') + return + + emoji = token.tokens[0] + + if not isinstance(emoji, WordToken): + tokenizer.error('Malformed token -- Expected an emoji (word token)') + return + + modifiers = [] + + i = 1 + while i + 1 < len(token.tokens): + t = token.tokens[i] + + if not isinstance(t, DotToken): + tokenizer.error('Malformed token -- Expected a dot') + return + + modifier = token.tokens[i + 1] + if not isinstance(modifier, WordToken): + tokenizer.error('Malformed token -- Expected a modifier name (word token)') + return + + if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken): + modifiers.append(Modifier(modifier.value, [])) + i += 2 + else: + args = token.tokens[i + 2] + modifiers.append(Modifier(modifier.value, args.tokens)) + i += 3 + + return Emoji(emoji.value, modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py new file mode 100644 index 000000000..6a01129f9 --- /dev/null +++ b/files/helpers/marseyfx/tokenizer.py @@ -0,0 +1,198 @@ +from abc import abstractmethod + +class TokenizerError: + index: int + error: str + + def __init__(self, index: int, error: str): + self.index = index + self.error = error + +class Tokenizer: + str: str + index: int + errors: list[TokenizerError] + + def __init__(self, str: str): + self.str = str + self.index = 0 + self.errors = [] + + def has_next(self): + return self.index < len(self.str) + + def peek(self): + return self.str[self.index] + + def eat(self): + c = self.peek() + self.index += 1 + return c + + def barf(self): + self.index -= 1 + + def error(self, error: str): + self.errors.append(TokenizerError(self.index, error)) + + def token_to_string(self, token): + return self.str[token.span[0]:token.span[1]] + + def parse_next_tokens(self): + start = self.index + tokens = [] + while self.has_next(): + if WordToken.can_parse(self): + tokens.append(WordToken.parse(self)) + elif DotToken.can_parse(self): + tokens.append(DotToken.parse(self)) + elif ArgsToken.can_parse(self): + tokens.append(ArgsToken.parse(self)) + else: + break + + return GroupToken((start, self.index), tokens) + +class Token: + span: tuple[int, int] + + @staticmethod + @abstractmethod + def can_parse(tokenizer: Tokenizer) -> bool: + pass + + @staticmethod + @abstractmethod + def parse(tokenizer: Tokenizer): + pass + +class WordToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek().fullmatch(r'[!#\w@]') + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if WordToken.can_parse(tokenizer): + value += tokenizer.eat() + else: + break + + return WordToken((start, tokenizer.index), value) + +class StringLiteralToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '"' + + # i was cuddling with my fwb while writing this ;3 + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokenizer.eat() + value = '' + next_escaped = False + while tokenizer.has_next(): + if tokenizer.peek() == '"' and not next_escaped: + tokenizer.eat() + break + elif tokenizer.peek() == '\\' and not next_escaped: + next_escaped = True + tokenizer.eat() + else: + value += tokenizer.eat() + next_escaped = False + + return StringLiteralToken((start, tokenizer.index), value) + +class NumberLiteralToken(Token): + value: float + + def __init__(self, span: tuple[int, int], value: float): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek().fullmatch(r'[-\d\.]') + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if NumberLiteralToken.can_parse(tokenizer): + value += tokenizer.eat() + else: + break + + try: + value = float(value) + except ValueError: + tokenizer.error('Invalid number literal') + value = 0.0 + + return NumberLiteralToken((start, tokenizer.index), value) + + def get_float(self): + return float(self.value) + +class DotToken(Token): + def __init__(self, span: tuple[int, int]): + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '.' + + @staticmethod + def parse(tokenizer: Tokenizer): + tokenizer.eat() + return DotToken((tokenizer.index, tokenizer.index + 1)) + +class GroupToken(Token): + children: list[Token] + + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + self.span = span + +class ArgsToken(Token): + children: list[GroupToken] + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '(' + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokens = [] + while tokenizer.has_next(): + if tokenizer.peek() == ')': + tokenizer.eat() + break + elif tokenizer.peek() == ',': + tokenizer.eat() + else: + tokens.append(tokenizer.parse_next_tokens()) + + return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 9b87436b7..d0b0f16ea 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,6 +52,8 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) +marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A) + snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 3ed11a8a5..d418b846c 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -4,8 +4,10 @@ import re import signal from functools import partial from os import path, listdir +from typing_extensions import deprecated from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time +from files.helpers.marseyfx.parser import parse_emoji from sqlalchemy.sql import func @@ -271,8 +273,21 @@ def find_all_emote_endings(word): return endings, word +def render_emojis(markup: str): + emojis_used = set() -def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): + for emoji_match in marseyfx_emoji_regex.finditer(markup): + emoji_str = emoji_match.group()[1:-1] # Cut off colons + success, emoji = parse_emoji(emoji_str) + if success: + emojis_used.add(emoji.name) + emoji_html = str(emoji.create_el()) + markup = markup.replace(emoji_match.group(), emoji_html) + + return markup, emojis_used + +@deprecated("Use the new one") +def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): emojis = list(regexp.finditer(html)) captured = set() @@ -529,29 +544,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = spoiler_regex.sub(r'\1', sanitized) - emojis_used = set() - - emojis = list(emoji_regex.finditer(sanitized)) - if len(emojis) > 20: golden = False - - captured = [] - for i in emojis: - if i.group(0) in captured: continue - captured.append(i.group(0)) - - old = i.group(0) - if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old: - new = old.lower().replace(">", " class='mb-0'>") - else: new = old.lower() - - new = render_emoji(new, emoji_regex2, golden, emojis_used, True) - - sanitized = sanitized.replace(old, new) - - emojis = list(emoji_regex2.finditer(sanitized)) - if len(emojis) > 20: golden = False - - sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used) + santiized, emojis_used = render_emojis(sanitized) sanitized = sanitized.replace('&','&') @@ -710,9 +703,7 @@ def filter_emojis_only(title, golden=True, count_emojis=False): title = remove_cuniform(title) - emojis_used = set() - - title = render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True) + title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True) if count_emojis: for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)): -- 2.34.1 From 07448a18f43a18091acc21c34bf86a4f6156eacb Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Tue, 29 Aug 2023 08:55:27 -0400 Subject: [PATCH 3/8] Add pixelated image rendering to Classic emojis --- files/assets/css/main.css | 6 ++++-- files/helpers/sanitize.py | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/files/assets/css/main.css b/files/assets/css/main.css index 97696468e..c795b559b 100644 --- a/files/assets/css/main.css +++ b/files/assets/css/main.css @@ -5517,8 +5517,7 @@ input[type=radio] ~ .custom-control-label::before { height: 150px; width: 150px; } -.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] -{ +.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] { max-width: 150px !important; max-height: 150px !important; max-width: min(150px,25vw) !important; @@ -5526,6 +5525,9 @@ input[type=radio] ~ .custom-control-label::before { display: inline-block; object-fit: contain; } +.emoji, .emoji-lg, img[data-kind=Classic] { + image-rendering: pixelated; +} span[data-bs-toggle], .pat-preview { position: relative; diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index d418b846c..044ecea55 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -83,6 +83,7 @@ def allowed_attributes(tag, name, value): if name in {'alt','title'}: return True if name == 'class' and value == 'img': return True if name == 'data-user-submitted' and not value: return True + if name == 'data-kind' and value in EMOJI_KINDS: return True if tag == 'lite-youtube': if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True @@ -338,6 +339,10 @@ def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False) if(is_loved): modifier_html = f'{modifier_html}{loved_html}' + kind = g.db.query(Emoji.kind).filter(Emoji.name==emoji).one_or_none() + if kind: + attrs += ' data-kind="' + kind[0].replace('"', '') + '"' + if (is_patted and emoji != 'marseyunpettable') or is_talking or is_genocided or is_loved: if path.isfile(f"files/assets/images/emojis/{emoji}.webp"): emoji_html = f'{modifier_html}{emoji_partial_pat.format(old, f"{SITE_FULL_IMAGES}/e/{emoji}.webp", attrs)}' -- 2.34.1 From 59d499237d145c7585655981567a0b9b70f0f57d Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Mon, 18 Sep 2023 01:40:02 -0400 Subject: [PATCH 4/8] transforms are back baby --- files/helpers/config/const.py | 8 ++-- files/helpers/marseyfx/modifiers.py | 62 ++++++++++++++++++----------- files/helpers/marseyfx/parser.py | 56 +++++++++++++------------- files/helpers/marseyfx/tokenizer.py | 16 +++++++- files/helpers/regex.py | 2 +- files/helpers/sanitize.py | 6 +-- 6 files changed, 90 insertions(+), 60 deletions(-) diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index 83f7b7edb..48ec9caf0 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day" PUSH_NOTIF_LIMIT = 1000 -IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local") +IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local") +print(f"IS_LOCALHOST: {IS_LOCALHOST}") if IS_LOCALHOST: SITE_FULL = 'http://' + SITE @@ -1073,9 +1074,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio db_session = scoped_session(sessionmaker(bind=engine, autoflush=False)) approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts])) -csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};" +csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};" if not IS_LOCALHOST: csp += ' upgrade-insecure-requests;' + with open("includes/content-security-policy", "w") as f: - f.write(f'add_header Content-Security-Policy "{csp}";') + f.write(f'add_header Content-Security-Policy "{csp}";') \ No newline at end of file diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index 37c44f993..e8d0623de 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -1,15 +1,24 @@ +import re from bs4 import BeautifulSoup, Tag from files.helpers.config.const import SITE_FULL_IMAGES -from files.helpers.marseyfx.parser import Modifier -from files.helpers.marseyfx.tokenizer import StringLiteralToken +from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token modifier_whitelist = [] +class Modifier: + name: str + args: list[Token] + + def __init__(self, name: str, args: list[Token]): + self.name = name + self.args = args + def modifier(fn): modifier_whitelist.append(fn.__name__) def wrapper(*args, **kwargs): - args[0].el['class'].append('marseyfx-modifier-' + fn.__name__) + slf = args[0] + slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) return fn(*args, **kwargs) return wrapper @@ -19,7 +28,7 @@ class Modified: def __init__(self, el): self.soup = BeautifulSoup() - self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container')) + self.el = el def add_class(self, class_: str): self.el.attrs['class'].append(' ' + class_) @@ -32,12 +41,20 @@ class Modified: # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks def image(self, name: str): - return self.soup.new_tag( + image = self.soup.new_tag( 'img', loading='lazy', - class_=f'marseyfx-{name}', - src=f'{SITE_FULL_IMAGES}/i/{name}.webp' - ) + src=f'{SITE_FULL_IMAGES}/i/{name}.webp', + attrs={'class': f'marseyfx-image marseyfx-image-{name}'} + ) + + container = self.soup.new_tag( + 'div', + attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'} + ) + + container.append(image) + return container def underlay(self, underlay: Tag): self.el.insert(0, underlay) @@ -47,16 +64,16 @@ class Modified: @modifier def pat(self): - self.overlay(self.el, self.image('pat')) + self.overlay(self.image('pat')) @modifier def love(self): - self.overlay(self.el, self.image('love-foreground')) - self.underlay(self.el, self.image('love-background')) + self.overlay(self.image('love-foreground')) + self.underlay(self.image('love-background')) @modifier def talking(self): - self.overlay(self.el, self.image('talking')) + self.overlay(self.image('talking')) @modifier def genocide(self): @@ -67,35 +84,34 @@ class Modified: if not isinstance(msg, StringLiteralToken): return - self.overlay(self.el, self.image('says')) + self.overlay(self.image('says')) self.el.append(self.soup.new_tag( 'span', - class_='marseyfx-modifier-says-text', - string=msg.value + string=msg.value, + attrs={'class': 'marseyfx-modifier-says-text'} )) @modifier def fallover(self): - self.el.wrap(self.soup.new_tag( + self.el = self.el.wrap(self.soup.new_tag( 'div', - class_='marseyfx-modifier-fallover-container' + attrs={'class': 'marseyfx-modifier-fallover-container'} )) @modifier - def transform(self, transformstyle: str): - if not transformstyle.fullmatch(r'[\w()\s%\.]*'): + def transform(self, transformstyle: StringLiteralToken): + if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value): + print(f'Evil transform detected: {transformstyle.value}') return - if not 'style' in self.el.attrs: - self.el.attrs['style'] = '' + self.el.attrs['style'] = f'transform: {transformstyle.value};' - self.el.attrs['style'] += f'transform: {transformstyle};' @modifier def enraged(self): self.underlay(self.soup.new_tag( 'div', - class_='marseyfx-enraged-underlay' + attrs={'class': 'marseyfx-enraged-underlay'} )) @modifier diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py index 04f936da2..b567fb44c 100644 --- a/files/helpers/marseyfx/parser.py +++ b/files/helpers/marseyfx/parser.py @@ -3,15 +3,7 @@ from tokenize import Token from bs4 import BeautifulSoup from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken -from modified import Modified - -class Modifier: - name: str - args: list[Token] - - def __init__(self, name: str, args: list[Token]): - self.name = name - self.args = args +from files.helpers.marseyfx.modifiers import Modified, Modifier emoji_replacers = { '!': 'is_flipped', @@ -29,41 +21,49 @@ class Emoji: def __init__(self, name: str, modifiers, token: Token): for symbol, value in emoji_replacers.items(): - name = name.replace(symbol, '') - setattr(self, value, True) + if symbol in name: + name = name.replace(symbol, '') + setattr(self, value, True) self.name = name self.modifiers = modifiers self.token = token + def create_el(self): soup = BeautifulSoup() el = soup.new_tag( 'img', loading='lazy', - class_='marseyfx-emoji', - src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp' + src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', + attrs={'class': f'marseyfx-emoji marseyfx-image'} + ) + soup.append(el) + el = el.wrap( + soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'}) ) - - if (self.is_big): - el['class'].append(' marseyfx-big') - - if (self.is_flipped): - el['class'].append(' marseyfx-flipped') mod = Modified(el) mod.apply_modifiers(self.modifiers) - return mod.el + container = soup.new_tag('div', attrs={'class': 'marseyfx-container'}) + if (self.is_big): + container['class'].append(' marseyfx-big') + + if (self.is_flipped): + container['class'].append(' marseyfx-flipped') + + return mod.el.wrap(container) def parse_emoji(str: str): tokenizer = Tokenizer(str) token = tokenizer.parse_next_tokens() - if len(tokenizer.errors) > 0: + if len(tokenizer.errors) > 0 or token is None: return False, None, token emoji = parse_from_token(tokenizer, token) + print(f'Here! {emoji}') if not emoji: return False, None, token @@ -75,7 +75,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): tokenizer.error('Malformed token -- Expected a group token') return - emoji = token.tokens[0] + emoji = token.children[0] if not isinstance(emoji, WordToken): tokenizer.error('Malformed token -- Expected an emoji (word token)') @@ -84,24 +84,24 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): modifiers = [] i = 1 - while i + 1 < len(token.tokens): - t = token.tokens[i] + while i + 1 < len(token.children): + t = token.children[i] if not isinstance(t, DotToken): tokenizer.error('Malformed token -- Expected a dot') return - modifier = token.tokens[i + 1] + modifier = token.children[i + 1] if not isinstance(modifier, WordToken): tokenizer.error('Malformed token -- Expected a modifier name (word token)') return - if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken): + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): modifiers.append(Modifier(modifier.value, [])) i += 2 else: - args = token.tokens[i + 2] - modifiers.append(Modifier(modifier.value, args.tokens)) + args = token.children[i + 2] + modifiers.append(Modifier(modifier.value, args.children)) i += 3 return Emoji(emoji.value, modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py index 6a01129f9..82e859b25 100644 --- a/files/helpers/marseyfx/tokenizer.py +++ b/files/helpers/marseyfx/tokenizer.py @@ -1,4 +1,5 @@ from abc import abstractmethod +import re class TokenizerError: index: int @@ -39,6 +40,7 @@ class Tokenizer: return self.str[token.span[0]:token.span[1]] def parse_next_tokens(self): + print(self.str[self.index:]) start = self.index tokens = [] while self.has_next(): @@ -48,9 +50,18 @@ class Tokenizer: tokens.append(DotToken.parse(self)) elif ArgsToken.can_parse(self): tokens.append(ArgsToken.parse(self)) + elif StringLiteralToken.can_parse(self): + tokens.append(StringLiteralToken.parse(self)) else: break + if len(tokens) == 0: + self.error('Expected a token') + return None + + if len(tokens) == 1: + return tokens[0] + return GroupToken((start, self.index), tokens) class Token: @@ -75,7 +86,7 @@ class WordToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): - return tokenizer.peek().fullmatch(r'[!#\w@]') + return re.fullmatch(r'[!#\w@]', tokenizer.peek()) @staticmethod def parse(tokenizer: Tokenizer): @@ -129,7 +140,7 @@ class NumberLiteralToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): - return tokenizer.peek().fullmatch(r'[-\d\.]') + return re.fullmatch(r'[-\d\.]', tokenizer.peek()) @staticmethod def parse(tokenizer: Tokenizer): @@ -193,6 +204,7 @@ class ArgsToken(Token): elif tokenizer.peek() == ',': tokenizer.eat() else: + tokenizer.eat() tokens.append(tokenizer.parse_next_tokens()) return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index d0b0f16ea..f754ce7b9 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) -marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A) +marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A) snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 044ecea55..eb2b749b3 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -279,7 +279,7 @@ def render_emojis(markup: str): for emoji_match in marseyfx_emoji_regex.finditer(markup): emoji_str = emoji_match.group()[1:-1] # Cut off colons - success, emoji = parse_emoji(emoji_str) + success, emoji, _ = parse_emoji(emoji_str) if success: emojis_used.add(emoji.name) emoji_html = str(emoji.create_el()) @@ -549,8 +549,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = spoiler_regex.sub(r'\1', sanitized) - santiized, emojis_used = render_emojis(sanitized) - sanitized = sanitized.replace('&','&') sanitized = video_sub_regex.sub(r'

', sanitized) @@ -576,6 +574,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) + sanitized, emojis_used = render_emojis(sanitized) + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) soup = BeautifulSoup(sanitized, 'lxml') -- 2.34.1 From 33163d2e5e1ab7a4eb7e7efb4cc0da039c02c304 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Mon, 18 Sep 2023 11:05:28 -0400 Subject: [PATCH 5/8] some updates xd --- files/helpers/marseyfx/modifiers.py | 100 ++++++++++++++++++++++++---- files/helpers/marseyfx/parser.py | 4 +- files/helpers/marseyfx/tokenizer.py | 12 ++-- 3 files changed, 93 insertions(+), 23 deletions(-) diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index e8d0623de..bb3412f51 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -1,7 +1,8 @@ import re from bs4 import BeautifulSoup, Tag from files.helpers.config.const import SITE_FULL_IMAGES -from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token +from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer +import files.helpers.marseyfx.parser as parser modifier_whitelist = [] @@ -18,20 +19,41 @@ def modifier(fn): def wrapper(*args, **kwargs): slf = args[0] - slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) + slf.child = slf.container + slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) + return fn(*args, **kwargs) + return wrapper + +def heavy(fn): + def wrapper(*args, **kwargs): + slf = args[0] + slf.heavy_count += 1 return fn(*args, **kwargs) return wrapper class Modified: soup: BeautifulSoup - el: Tag #BeautifulSoup element + container: Tag + child: Tag + tokenizer: Tokenizer + heavy_count = 0 - def __init__(self, el): + def __init__(self, el, tokenizer): self.soup = BeautifulSoup() - self.el = el + self.container = el + self.tokenizer = tokenizer def add_class(self, class_: str): - self.el.attrs['class'].append(' ' + class_) + if not 'class' in self.container.attrs: + self.container.attrs['class'] = '' + else: + self.container.attrs['class'].append(' ' + class_) + + def add_child_class(self, class_: str): + if not 'class' in self.child.attrs: + self.child.attrs['class'] = '' + else: + self.child.attrs['class'].append(' ' + class_) def apply_modifiers(self, modifiers: list[Modifier]): for modifier in modifiers: @@ -40,7 +62,7 @@ class Modified: # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks - def image(self, name: str): + def image_href(self, name: str): image = self.soup.new_tag( 'img', loading='lazy', @@ -57,10 +79,16 @@ class Modified: return container def underlay(self, underlay: Tag): - self.el.insert(0, underlay) + self.container.insert(0, underlay) def overlay(self, overlay: Tag): - self.el.append(overlay) + self.container.append(overlay) + + def add_style(self, style: str): + if 'style' in self.container.attrs: + style = self.container.attrs['style'] + style + + self.container.attrs['style'] = style @modifier def pat(self): @@ -85,7 +113,7 @@ class Modified: return self.overlay(self.image('says')) - self.el.append(self.soup.new_tag( + self.container.append(self.soup.new_tag( 'span', string=msg.value, attrs={'class': 'marseyfx-modifier-says-text'} @@ -93,7 +121,7 @@ class Modified: @modifier def fallover(self): - self.el = self.el.wrap(self.soup.new_tag( + self.container = self.container.wrap(self.soup.new_tag( 'div', attrs={'class': 'marseyfx-modifier-fallover-container'} )) @@ -104,9 +132,9 @@ class Modified: print(f'Evil transform detected: {transformstyle.value}') return - self.el.attrs['style'] = f'transform: {transformstyle.value};' - + self.add_style(f'transform: {transformstyle.value};') + @heavy @modifier def enraged(self): self.underlay(self.soup.new_tag( @@ -114,10 +142,54 @@ class Modified: attrs={'class': 'marseyfx-enraged-underlay'} )) + @heavy @modifier def corrupted(self): pass + @heavy @modifier def wavy(self): - self.el.wrap(self.soup.new_tag('svg')) \ No newline at end of file + self.container.wrap(self.soup.new_tag('svg')) + + @modifier + def toptext(self, text: StringLiteralToken): + if not isinstance(text, StringLiteralToken): + return + + self.overlay(self.soup.new_tag( + 'span', + string=text.value, + attrs={'class': 'marseyfx-modifier-toptext-text'} + )) + + @modifier + def bottomtext(self, text: StringLiteralToken): + if not isinstance(text, StringLiteralToken): + return + + self.overlay(self.soup.new_tag( + 'span', + string=text.value, + attrs={'class': 'marseyfx-modifier-bottomtext-text'} + )) + + @modifier + def spin(self, speed: NumberLiteralToken): + self.add_style('--marseyfx-spin-speed: ' + speed.value + ';') + + @modifier + def triumphs(self, other: GroupToken): + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return + + self.add_child_class('marseyfx-modifier-triumphs-self') + + other = other_emoji.create_el().wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'}) + ) + self.underlay(other) + + \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py index b567fb44c..985d612e5 100644 --- a/files/helpers/marseyfx/parser.py +++ b/files/helpers/marseyfx/parser.py @@ -53,7 +53,7 @@ class Emoji: if (self.is_flipped): container['class'].append(' marseyfx-flipped') - return mod.el.wrap(container) + return mod.container.wrap(container) def parse_emoji(str: str): tokenizer = Tokenizer(str) @@ -101,7 +101,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): i += 2 else: args = token.children[i + 2] - modifiers.append(Modifier(modifier.value, args.children)) + modifiers.append(Modifier(modifier.value, *args.children)) i += 3 return Emoji(emoji.value, modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py index 82e859b25..bd3db22c3 100644 --- a/files/helpers/marseyfx/tokenizer.py +++ b/files/helpers/marseyfx/tokenizer.py @@ -40,11 +40,12 @@ class Tokenizer: return self.str[token.span[0]:token.span[1]] def parse_next_tokens(self): - print(self.str[self.index:]) start = self.index tokens = [] while self.has_next(): - if WordToken.can_parse(self): + if NumberLiteralToken.can_parse(self): + tokens.append(NumberLiteralToken.parse(self)) + elif WordToken.can_parse(self): tokens.append(WordToken.parse(self)) elif DotToken.can_parse(self): tokens.append(DotToken.parse(self)) @@ -58,9 +59,6 @@ class Tokenizer: if len(tokens) == 0: self.error('Expected a token') return None - - if len(tokens) == 1: - return tokens[0] return GroupToken((start, self.index), tokens) @@ -86,7 +84,7 @@ class WordToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): - return re.fullmatch(r'[!#\w@]', tokenizer.peek()) + return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek()) @staticmethod def parse(tokenizer: Tokenizer): @@ -205,6 +203,6 @@ class ArgsToken(Token): tokenizer.eat() else: tokenizer.eat() - tokens.append(tokenizer.parse_next_tokens()) + tokens.extend(tokenizer.parse_next_tokens()) return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file -- 2.34.1 From 897ee3e1cd224307fbaa7d8a98a4042b57672654 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Mon, 18 Sep 2023 22:27:27 -0400 Subject: [PATCH 6/8] small changes uwu --- files/helpers/marseyfx/modifiers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index bb3412f51..68b3e8d9b 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -62,7 +62,7 @@ class Modified: # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks - def image_href(self, name: str): + def image(self, name: str): image = self.soup.new_tag( 'img', loading='lazy', @@ -139,12 +139,12 @@ class Modified: def enraged(self): self.underlay(self.soup.new_tag( 'div', - attrs={'class': 'marseyfx-enraged-underlay'} + attrs={'class': 'marseyfx-modifier-enraged-underlay'} )) @heavy @modifier - def corrupted(self): + def highcontrast(self): pass @heavy @@ -176,7 +176,7 @@ class Modified: @modifier def spin(self, speed: NumberLiteralToken): - self.add_style('--marseyfx-spin-speed: ' + speed.value + ';') + self.add_style('--marseyfx-spin-peroid-multiplier: ' + (1/speed.value) + ';') @modifier def triumphs(self, other: GroupToken): -- 2.34.1 From 22c9dd19080266c0c505ac5ebdd7f77751f8eb38 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Sun, 24 Sep 2023 02:02:53 -0400 Subject: [PATCH 7/8] Update MarseyFX --- .gitignore | 4 + files/assets/images/prohibition.svg | 57 +++++ files/assets/images/scope.svg | 347 ++++++++++++++++++++++++++++ files/helpers/marseyfx/modifiers.py | 192 ++++++++++++--- files/helpers/marseyfx/parser.py | 49 ++-- files/helpers/marseyfx/tokenizer.py | 40 +++- files/helpers/regex.py | 2 +- files/helpers/sanitize.py | 74 ++++-- 8 files changed, 687 insertions(+), 78 deletions(-) create mode 100644 files/assets/images/prohibition.svg create mode 100644 files/assets/images/scope.svg diff --git a/.gitignore b/.gitignore index 97a56b27c..4cdeeafe9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ __pycache__/ emojis.zip emojis_original.zip includes/content-security-policy +includes/headers +nginx.conf +.gitignore +docker-compose.yml \ No newline at end of file diff --git a/files/assets/images/prohibition.svg b/files/assets/images/prohibition.svg new file mode 100644 index 000000000..a9e1c44af --- /dev/null +++ b/files/assets/images/prohibition.svg @@ -0,0 +1,57 @@ + + + + + + + + diff --git a/files/assets/images/scope.svg b/files/assets/images/scope.svg new file mode 100644 index 000000000..35636b9f0 --- /dev/null +++ b/files/assets/images/scope.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index 68b3e8d9b..679b86ef0 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -1,4 +1,6 @@ +import copy import re +from typing import Optional from bs4 import BeautifulSoup, Tag from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer @@ -19,9 +21,14 @@ def modifier(fn): def wrapper(*args, **kwargs): slf = args[0] + ctx = ModifierContextFrame(fn.__name__) + slf.context_frames.insert(0, ctx) slf.child = slf.container - slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) - return fn(*args, **kwargs) + slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'})) + slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self') + res = fn(*args, **kwargs) + slf.context_frames.pop(0) + return res return wrapper def heavy(fn): @@ -31,17 +38,27 @@ def heavy(fn): return fn(*args, **kwargs) return wrapper +class ModifierContextFrame: + name: str + def __init__(self, name: str): + self.name = name + class Modified: soup: BeautifulSoup container: Tag child: Tag tokenizer: Tokenizer heavy_count = 0 + context_frames: list[ModifierContextFrame] def __init__(self, el, tokenizer): self.soup = BeautifulSoup() self.container = el self.tokenizer = tokenizer + self.context_frames = [] + + def ctx(self): + return self.context_frames[0] if len(self.context_frames) > 0 else None def add_class(self, class_: str): if not 'class' in self.container.attrs: @@ -58,15 +75,21 @@ class Modified: def apply_modifiers(self, modifiers: list[Modifier]): for modifier in modifiers: if modifier.name in modifier_whitelist: - getattr(self, modifier.name)(*modifier.args) + getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args)) # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks def image(self, name: str): + + filename = name + + if not '.' in filename: + filename += '.webp' + image = self.soup.new_tag( 'img', loading='lazy', - src=f'{SITE_FULL_IMAGES}/i/{name}.webp', + src=f'{SITE_FULL_IMAGES}/i/{filename}', attrs={'class': f'marseyfx-image marseyfx-image-{name}'} ) @@ -90,9 +113,39 @@ class Modified: self.container.attrs['style'] = style + def meme_text(self, text: str, class_: Optional[str] = None): + attrs = {} + if class_ is not None: + attrs = {'class': f'marseyfx-memetext-{class_}'} + + tag = self.soup.new_tag( + 'span', + attrs=attrs + ) + + tag.string = text + + self.overlay(tag) + + def create_other(self, other: GroupToken = None): + wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'}) + + if other is None: + return wrapper + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return wrapper + + other_emoji.is_primary = False + + return other_emoji.create_el(self.tokenizer).wrap(wrapper) + @modifier def pat(self): - self.overlay(self.image('pat')) + self.overlay(self.image('hand')) @modifier def love(self): @@ -107,18 +160,33 @@ class Modified: def genocide(self): pass + @modifier + def party(self): + pass + @modifier def says(self, msg): if not isinstance(msg, StringLiteralToken): return - self.overlay(self.image('says')) - self.container.append(self.soup.new_tag( - 'span', - string=msg.value, - attrs={'class': 'marseyfx-modifier-says-text'} + container = self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-container'} + ) + self.container.append(container) + + container.append(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-nub'} )) + tag = self.soup.new_tag( + 'span', + attrs={'class': 'marseyfx-modifier-says-text'} + ) + tag.string = msg.value + container.append(tag) + @modifier def fallover(self): self.container = self.container.wrap(self.soup.new_tag( @@ -142,54 +210,106 @@ class Modified: attrs={'class': 'marseyfx-modifier-enraged-underlay'} )) - @heavy @modifier - def highcontrast(self): - pass + def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None): + if isinstance(toptext, StringLiteralToken): + self.meme_text(toptext.value, 'toptext') - @heavy - @modifier - def wavy(self): - self.container.wrap(self.soup.new_tag('svg')) + if isinstance(bottomtext, StringLiteralToken): + self.meme_text(bottomtext.value, 'bottomtext') - @modifier - def toptext(self, text: StringLiteralToken): - if not isinstance(text, StringLiteralToken): - return - - self.overlay(self.soup.new_tag( - 'span', - string=text.value, - attrs={'class': 'marseyfx-modifier-toptext-text'} - )) - - @modifier def bottomtext(self, text: StringLiteralToken): if not isinstance(text, StringLiteralToken): return - self.overlay(self.soup.new_tag( + tag = self.soup.new_tag( 'span', - string=text.value, attrs={'class': 'marseyfx-modifier-bottomtext-text'} - )) + ) + + tag.string = text.value + + self.overlay(tag) @modifier - def spin(self, speed: NumberLiteralToken): - self.add_style('--marseyfx-spin-peroid-multiplier: ' + (1/speed.value) + ';') + def spin(self, speed=None): + if not isinstance(speed, NumberLiteralToken): + return + + self.add_style(f'animation-duration: {1/speed.value}s;') @modifier def triumphs(self, other: GroupToken): + other = other.wrap() other_emoji = parser.parse_from_token(self.tokenizer, other) + print(f'Other emoji: {other_emoji} / Token: {other}') if other_emoji is None: return self.add_child_class('marseyfx-modifier-triumphs-self') - other = other_emoji.create_el().wrap( + other_emoji.is_primary = False + + other = other_emoji.create_el(self.tokenizer).wrap( self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'}) ) self.underlay(other) - \ No newline at end of file + @modifier + def nested(self, inside: GroupToken): + inside = inside.wrap() + inside_emoji = parser.parse_from_token(self.tokenizer, inside) + + if inside_emoji is None: + return + + inside_emoji.is_primary = False + + inside = inside_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'}) + ) + + self.underlay(inside) + + self.add_child_class('marseyfx-modifier-nested-side') + child = self.child + self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'})) + other_side = copy.copy(child) + self.child.append(other_side) + + @modifier + def morph(self, other: GroupToken): + self.add_child_class('marseyfx-modifier-morph-self') + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return + + other_emoji.is_primary = False + other = other_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'}) + ) + + self.container.append(other) + + @heavy + @modifier + def bulge(self, strength: NumberLiteralToken = None): + self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'})) + + @modifier + def prohibition(self): + self.overlay(self.image('prohibition.svg')) + + @modifier + def snipe(self): + self.overlay(self.image('scope.svg')) + self.add_child_class('marseyfx-modifier-snipe-target') + + @modifier + def fucks(self, other: GroupToken): + other = self.create_other(other) + self.container.append(other) \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py index 985d612e5..bfd7c0530 100644 --- a/files/helpers/marseyfx/parser.py +++ b/files/helpers/marseyfx/parser.py @@ -2,8 +2,8 @@ from tokenize import Token from bs4 import BeautifulSoup from files.helpers.config.const import SITE_FULL_IMAGES -from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken -from files.helpers.marseyfx.modifiers import Modified, Modifier +from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken +from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist emoji_replacers = { '!': 'is_flipped', @@ -18,8 +18,9 @@ class Emoji: is_flipped = False is_user = False modifiers: list[Modifier] + is_primary = True - def __init__(self, name: str, modifiers, token: Token): + def __init__(self, name: str, modifiers, token: Token, **args): for symbol, value in emoji_replacers.items(): if symbol in name: name = name.replace(symbol, '') @@ -28,25 +29,42 @@ class Emoji: self.name = name self.modifiers = modifiers self.token = token + self.is_primary = args.get('is_primary', True) - def create_el(self): + def create_el(self, tokenizer: Tokenizer): soup = BeautifulSoup() el = soup.new_tag( 'img', loading='lazy', src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', - attrs={'class': f'marseyfx-emoji marseyfx-image'} + attrs={ + 'class': f'marseyfx-emoji marseyfx-image', + } ) soup.append(el) el = el.wrap( - soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'}) + soup.new_tag('div', attrs={ + 'class': 'marseyfx-emoji-container' + }) ) - mod = Modified(el) + mod = Modified(el, tokenizer) mod.apply_modifiers(self.modifiers) - container = soup.new_tag('div', attrs={'class': 'marseyfx-container'}) + + container_attrs = { + 'class': 'marseyfx-container', + } + + if self.is_primary: + container_attrs |= { + 'data-bs-toggle': 'tooltip', + 'title': tokenizer.str + } + + container = soup.new_tag('div', attrs=container_attrs) + if (self.is_big): container['class'].append(' marseyfx-big') @@ -55,8 +73,7 @@ class Emoji: return mod.container.wrap(container) -def parse_emoji(str: str): - tokenizer = Tokenizer(str) +def parse_emoji(tokenizer: Tokenizer): token = tokenizer.parse_next_tokens() if len(tokenizer.errors) > 0 or token is None: @@ -77,8 +94,8 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): emoji = token.children[0] - if not isinstance(emoji, WordToken): - tokenizer.error('Malformed token -- Expected an emoji (word token)') + if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken): + tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token') return modifiers = [] @@ -96,12 +113,16 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): tokenizer.error('Malformed token -- Expected a modifier name (word token)') return + if not modifier.value in modifier_whitelist: + tokenizer.error(f'Unknown modifier: {modifier.value}') + return + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): modifiers.append(Modifier(modifier.value, [])) i += 2 else: args = token.children[i + 2] - modifiers.append(Modifier(modifier.value, *args.children)) + modifiers.append(Modifier(modifier.value, args.children)) i += 3 - return Emoji(emoji.value, modifiers, token) \ No newline at end of file + return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py index bd3db22c3..7d44b84bc 100644 --- a/files/helpers/marseyfx/tokenizer.py +++ b/files/helpers/marseyfx/tokenizer.py @@ -5,10 +5,14 @@ class TokenizerError: index: int error: str - def __init__(self, index: int, error: str): + def __init__(self, tokenizer, index: int, error: str): + self.tokenizer = tokenizer self.index = index self.error = error + def __str__(self): + return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^' + class Tokenizer: str: str index: int @@ -23,6 +27,9 @@ class Tokenizer: return self.index < len(self.str) def peek(self): + if not self.has_next(): + self.error('Unexpected end of input') + return None return self.str[self.index] def eat(self): @@ -34,7 +41,7 @@ class Tokenizer: self.index -= 1 def error(self, error: str): - self.errors.append(TokenizerError(self.index, error)) + self.errors.append(TokenizerError(self, self.index, error)) def token_to_string(self, token): return self.str[token.span[0]:token.span[1]] @@ -43,7 +50,9 @@ class Tokenizer: start = self.index tokens = [] while self.has_next(): - if NumberLiteralToken.can_parse(self): + if self.peek() == ' ': + self.eat() + elif NumberLiteralToken.can_parse(self): tokens.append(NumberLiteralToken.parse(self)) elif WordToken.can_parse(self): tokens.append(WordToken.parse(self)) @@ -65,6 +74,12 @@ class Tokenizer: class Token: span: tuple[int, int] + def wrap(self): + if isinstance(self, GroupToken): + return self + else: + return GroupToken(self.span, [self]) + @staticmethod @abstractmethod def can_parse(tokenizer: Tokenizer) -> bool: @@ -138,6 +153,10 @@ class NumberLiteralToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[-\d]', tokenizer.peek()) + + @staticmethod + def can_parse_next(tokenizer: Tokenizer): return re.fullmatch(r'[-\d\.]', tokenizer.peek()) @staticmethod @@ -145,7 +164,7 @@ class NumberLiteralToken(Token): start = tokenizer.index value = '' while tokenizer.has_next(): - if NumberLiteralToken.can_parse(tokenizer): + if NumberLiteralToken.can_parse_next(tokenizer): value += tokenizer.eat() else: break @@ -179,8 +198,16 @@ class GroupToken(Token): def __init__(self, span: tuple[int, int], children: list[Token]): self.children = children + + # this span is probably wrong tbh but idc self.span = span + def unwrap(self): + if len(self.children) == 1: + return self.children[0] + else: + return self + class ArgsToken(Token): children: list[GroupToken] def __init__(self, span: tuple[int, int], children: list[Token]): @@ -196,13 +223,14 @@ class ArgsToken(Token): start = tokenizer.index tokens = [] while tokenizer.has_next(): + if tokenizer.peek() == '(': + tokenizer.eat() if tokenizer.peek() == ')': tokenizer.eat() break elif tokenizer.peek() == ',': tokenizer.eat() else: - tokenizer.eat() - tokens.extend(tokenizer.parse_next_tokens()) + tokens.append(tokenizer.parse_next_tokens()) return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index f754ce7b9..e71e4e9bf 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) -marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A) +marseyfx_emoji_regex = re.compile(':[\w#!].{0,98}?[^\\\\]:', flags=re.A) snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index eb2b749b3..9c5d26b41 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -8,6 +8,7 @@ from typing_extensions import deprecated from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time from files.helpers.marseyfx.parser import parse_emoji +from files.helpers.marseyfx.tokenizer import Tokenizer from sqlalchemy.sql import func @@ -129,7 +130,7 @@ def build_url_re(tlds, protocols): """ return re.compile( r"""\(*# Match any opening parentheses. - \b(?"]*)? # /path/zz (excluding "unsafe" chars from RFC 1738, @@ -274,18 +275,56 @@ def find_all_emote_endings(word): return endings, word -def render_emojis(markup: str): - emojis_used = set() +class RenderEmojisResult: + emojis_used: set[str] + heavy_count = 0 + tags: list[str] + + def __init__(self): + self.emojis_used = set() + self.tags = [] + + def update(self, other): + self.emojis_used |= other.emojis_used + self.heavy_count += other.heavy_count + self.tags.extend(other.tags) + +def render_emojis(markup: str, **kwargs): + result = RenderEmojisResult() + last_match_end = 0 + + golden = kwargs.get('golden', True) + permit_big = kwargs.get('permit_big', True) for emoji_match in marseyfx_emoji_regex.finditer(markup): - emoji_str = emoji_match.group()[1:-1] # Cut off colons - success, emoji, _ = parse_emoji(emoji_str) - if success: - emojis_used.add(emoji.name) - emoji_html = str(emoji.create_el()) - markup = markup.replace(emoji_match.group(), emoji_html) + previous_text = markup[last_match_end:emoji_match.start()] + if previous_text != '': + result.tags.append(previous_text) + last_match_end = emoji_match.end() - return markup, emojis_used + emoji_str = emoji_match.group()[1:-1] # Cut off colons + + tokenizer = Tokenizer(emoji_str) + success, emoji, _ = parse_emoji(tokenizer) + if success: + result.emojis_used.add(emoji.name) + + if not permit_big: + emoji.is_big = False + + emoji_html = emoji.create_el(tokenizer) + result.tags.append(emoji_html) + + if len(tokenizer.errors) > 0: + soup = BeautifulSoup() + err_tag = soup.new_tag('pre', attrs={'class': 'marseyfx-error'}) + nl = "\n " + err_tag.string = 'MarseyFX error:' + nl + nl.join(map(str,tokenizer.errors)) + result.tags.append(err_tag) + #result.tags.append(f':{emoji_str}:') + + result.tags.append(markup[last_match_end:]) + return result @deprecated("Use the new one") def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): @@ -554,11 +593,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = video_sub_regex.sub(r'

', sanitized) sanitized = audio_sub_regex.sub(r'', sanitized) - if count_emojis: - for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)): - emoji.count += 1 - g.db.add(emoji) - sanitized = sanitized.replace('

', '') allowed_css_properties = allowed_styles.copy() @@ -574,9 +608,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) - sanitized, emojis_used = render_emojis(sanitized) - #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) <-- i have no clue what this means lol soup = BeautifulSoup(sanitized, 'lxml') has_transform = bool(soup.select('[style*=transform i]')) @@ -660,9 +693,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis html = f'

{html}

' sanitized = sanitized.replace(i.group(0), html) - if '
' not in sanitized and blackjack != "rules":
-		sanitized = sanitized.replace('\n','')
-
 	if showmore:
 		# Insert a show more button if the text is too long or has too many paragraphs
 		CHARLIMIT = 3000
@@ -708,7 +738,9 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
 
 	title = remove_cuniform(title)
 
-	title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+	res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+
+	title = ''.join(map(str, res.tags))
 
 	if count_emojis:
 		for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
-- 
2.34.1


From c5209495fd8de884c3c2f35d76904c2851894c0b Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Mon, 25 Sep 2023 01:17:09 -0400
Subject: [PATCH 8/8] Add slur/profanity replacements

---
 files/helpers/slurs_and_profanities.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/files/helpers/slurs_and_profanities.py b/files/helpers/slurs_and_profanities.py
index 37cd3ef33..7e6c5546e 100644
--- a/files/helpers/slurs_and_profanities.py
+++ b/files/helpers/slurs_and_profanities.py
@@ -11,6 +11,12 @@ def replace_train_html(body):
 	body = body.replace(troon, ':marseytrain2:')
 	return body
 
+trans_valid_copypasta = """You are a real woman. You have no womb, you have no ovaries, you have no eggs, but that doesn't matter. You are a valid human who is trying to feel comfortable in her body.
+All the “validation” you get is pure. Behind your back people love you. Your parents are happy and proud of you, your friends laugh at your jokes behind closed doors, and boys love you, and girls envy you.
+Men absolutely love you. Trans folk who “pass” look ordinary and natural to a man. Your bone structure does not matter. Estradiol widens the hips.
+You will be happy. You will smile every single morning and tell yourself it’s going to be ok, and deep inside you feel the euphoria creeping up like a weed. It is what defines you, not the transphobes.
+Eventually, it’ll be perfect for you - you’ll come out, start HRT, get top surgery, and finally be your ideal self. Your parents will find you, happy and relieved that they finally have a happy daughter. They’ll congratulate you on your hard journey, and every passerby for the rest of eternity will know a woman is what you are."""
+
 SLURS = {
 	"tranny": tranny,
 	"trannie": trannie,
@@ -27,12 +33,16 @@ SLURS = {
 	"(?