From 22c9dd19080266c0c505ac5ebdd7f77751f8eb38 Mon Sep 17 00:00:00 2001 From: transbitch <> Date: Sun, 24 Sep 2023 02:02:53 -0400 Subject: [PATCH] Update MarseyFX --- .gitignore | 4 + files/assets/images/prohibition.svg | 57 +++++ files/assets/images/scope.svg | 347 ++++++++++++++++++++++++++++ files/helpers/marseyfx/modifiers.py | 192 ++++++++++++--- files/helpers/marseyfx/parser.py | 49 ++-- files/helpers/marseyfx/tokenizer.py | 40 +++- files/helpers/regex.py | 2 +- files/helpers/sanitize.py | 74 ++++-- 8 files changed, 687 insertions(+), 78 deletions(-) create mode 100644 files/assets/images/prohibition.svg create mode 100644 files/assets/images/scope.svg diff --git a/.gitignore b/.gitignore index 97a56b27c..4cdeeafe9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ __pycache__/ emojis.zip emojis_original.zip includes/content-security-policy +includes/headers +nginx.conf +.gitignore +docker-compose.yml \ No newline at end of file diff --git a/files/assets/images/prohibition.svg b/files/assets/images/prohibition.svg new file mode 100644 index 000000000..a9e1c44af --- /dev/null +++ b/files/assets/images/prohibition.svg @@ -0,0 +1,57 @@ + + + + + + + + diff --git a/files/assets/images/scope.svg b/files/assets/images/scope.svg new file mode 100644 index 000000000..35636b9f0 --- /dev/null +++ b/files/assets/images/scope.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index 68b3e8d9b..679b86ef0 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -1,4 +1,6 @@ +import copy import re +from typing import Optional from bs4 import BeautifulSoup, Tag from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer @@ -19,9 +21,14 @@ def modifier(fn): def wrapper(*args, **kwargs): slf = args[0] + ctx = ModifierContextFrame(fn.__name__) + slf.context_frames.insert(0, ctx) slf.child = slf.container - slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) - return fn(*args, **kwargs) + slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'})) + slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self') + res = fn(*args, **kwargs) + slf.context_frames.pop(0) + return res return wrapper def heavy(fn): @@ -31,17 +38,27 @@ def heavy(fn): return fn(*args, **kwargs) return wrapper +class ModifierContextFrame: + name: str + def __init__(self, name: str): + self.name = name + class Modified: soup: BeautifulSoup container: Tag child: Tag tokenizer: Tokenizer heavy_count = 0 + context_frames: list[ModifierContextFrame] def __init__(self, el, tokenizer): self.soup = BeautifulSoup() self.container = el self.tokenizer = tokenizer + self.context_frames = [] + + def ctx(self): + return self.context_frames[0] if len(self.context_frames) > 0 else None def add_class(self, class_: str): if not 'class' in self.container.attrs: @@ -58,15 +75,21 @@ class Modified: def apply_modifiers(self, modifiers: list[Modifier]): for modifier in modifiers: if modifier.name in modifier_whitelist: - getattr(self, modifier.name)(*modifier.args) + getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args)) # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks def image(self, name: str): + + filename = name + + if not '.' in filename: + filename += '.webp' + image = self.soup.new_tag( 'img', loading='lazy', - src=f'{SITE_FULL_IMAGES}/i/{name}.webp', + src=f'{SITE_FULL_IMAGES}/i/{filename}', attrs={'class': f'marseyfx-image marseyfx-image-{name}'} ) @@ -90,9 +113,39 @@ class Modified: self.container.attrs['style'] = style + def meme_text(self, text: str, class_: Optional[str] = None): + attrs = {} + if class_ is not None: + attrs = {'class': f'marseyfx-memetext-{class_}'} + + tag = self.soup.new_tag( + 'span', + attrs=attrs + ) + + tag.string = text + + self.overlay(tag) + + def create_other(self, other: GroupToken = None): + wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'}) + + if other is None: + return wrapper + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return wrapper + + other_emoji.is_primary = False + + return other_emoji.create_el(self.tokenizer).wrap(wrapper) + @modifier def pat(self): - self.overlay(self.image('pat')) + self.overlay(self.image('hand')) @modifier def love(self): @@ -107,18 +160,33 @@ class Modified: def genocide(self): pass + @modifier + def party(self): + pass + @modifier def says(self, msg): if not isinstance(msg, StringLiteralToken): return - self.overlay(self.image('says')) - self.container.append(self.soup.new_tag( - 'span', - string=msg.value, - attrs={'class': 'marseyfx-modifier-says-text'} + container = self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-container'} + ) + self.container.append(container) + + container.append(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-nub'} )) + tag = self.soup.new_tag( + 'span', + attrs={'class': 'marseyfx-modifier-says-text'} + ) + tag.string = msg.value + container.append(tag) + @modifier def fallover(self): self.container = self.container.wrap(self.soup.new_tag( @@ -142,54 +210,106 @@ class Modified: attrs={'class': 'marseyfx-modifier-enraged-underlay'} )) - @heavy @modifier - def highcontrast(self): - pass + def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None): + if isinstance(toptext, StringLiteralToken): + self.meme_text(toptext.value, 'toptext') - @heavy - @modifier - def wavy(self): - self.container.wrap(self.soup.new_tag('svg')) + if isinstance(bottomtext, StringLiteralToken): + self.meme_text(bottomtext.value, 'bottomtext') - @modifier - def toptext(self, text: StringLiteralToken): - if not isinstance(text, StringLiteralToken): - return - - self.overlay(self.soup.new_tag( - 'span', - string=text.value, - attrs={'class': 'marseyfx-modifier-toptext-text'} - )) - - @modifier def bottomtext(self, text: StringLiteralToken): if not isinstance(text, StringLiteralToken): return - self.overlay(self.soup.new_tag( + tag = self.soup.new_tag( 'span', - string=text.value, attrs={'class': 'marseyfx-modifier-bottomtext-text'} - )) + ) + + tag.string = text.value + + self.overlay(tag) @modifier - def spin(self, speed: NumberLiteralToken): - self.add_style('--marseyfx-spin-peroid-multiplier: ' + (1/speed.value) + ';') + def spin(self, speed=None): + if not isinstance(speed, NumberLiteralToken): + return + + self.add_style(f'animation-duration: {1/speed.value}s;') @modifier def triumphs(self, other: GroupToken): + other = other.wrap() other_emoji = parser.parse_from_token(self.tokenizer, other) + print(f'Other emoji: {other_emoji} / Token: {other}') if other_emoji is None: return self.add_child_class('marseyfx-modifier-triumphs-self') - other = other_emoji.create_el().wrap( + other_emoji.is_primary = False + + other = other_emoji.create_el(self.tokenizer).wrap( self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'}) ) self.underlay(other) - \ No newline at end of file + @modifier + def nested(self, inside: GroupToken): + inside = inside.wrap() + inside_emoji = parser.parse_from_token(self.tokenizer, inside) + + if inside_emoji is None: + return + + inside_emoji.is_primary = False + + inside = inside_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'}) + ) + + self.underlay(inside) + + self.add_child_class('marseyfx-modifier-nested-side') + child = self.child + self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'})) + other_side = copy.copy(child) + self.child.append(other_side) + + @modifier + def morph(self, other: GroupToken): + self.add_child_class('marseyfx-modifier-morph-self') + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return + + other_emoji.is_primary = False + other = other_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'}) + ) + + self.container.append(other) + + @heavy + @modifier + def bulge(self, strength: NumberLiteralToken = None): + self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'})) + + @modifier + def prohibition(self): + self.overlay(self.image('prohibition.svg')) + + @modifier + def snipe(self): + self.overlay(self.image('scope.svg')) + self.add_child_class('marseyfx-modifier-snipe-target') + + @modifier + def fucks(self, other: GroupToken): + other = self.create_other(other) + self.container.append(other) \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py index 985d612e5..bfd7c0530 100644 --- a/files/helpers/marseyfx/parser.py +++ b/files/helpers/marseyfx/parser.py @@ -2,8 +2,8 @@ from tokenize import Token from bs4 import BeautifulSoup from files.helpers.config.const import SITE_FULL_IMAGES -from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken -from files.helpers.marseyfx.modifiers import Modified, Modifier +from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken +from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist emoji_replacers = { '!': 'is_flipped', @@ -18,8 +18,9 @@ class Emoji: is_flipped = False is_user = False modifiers: list[Modifier] + is_primary = True - def __init__(self, name: str, modifiers, token: Token): + def __init__(self, name: str, modifiers, token: Token, **args): for symbol, value in emoji_replacers.items(): if symbol in name: name = name.replace(symbol, '') @@ -28,25 +29,42 @@ class Emoji: self.name = name self.modifiers = modifiers self.token = token + self.is_primary = args.get('is_primary', True) - def create_el(self): + def create_el(self, tokenizer: Tokenizer): soup = BeautifulSoup() el = soup.new_tag( 'img', loading='lazy', src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', - attrs={'class': f'marseyfx-emoji marseyfx-image'} + attrs={ + 'class': f'marseyfx-emoji marseyfx-image', + } ) soup.append(el) el = el.wrap( - soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'}) + soup.new_tag('div', attrs={ + 'class': 'marseyfx-emoji-container' + }) ) - mod = Modified(el) + mod = Modified(el, tokenizer) mod.apply_modifiers(self.modifiers) - container = soup.new_tag('div', attrs={'class': 'marseyfx-container'}) + + container_attrs = { + 'class': 'marseyfx-container', + } + + if self.is_primary: + container_attrs |= { + 'data-bs-toggle': 'tooltip', + 'title': tokenizer.str + } + + container = soup.new_tag('div', attrs=container_attrs) + if (self.is_big): container['class'].append(' marseyfx-big') @@ -55,8 +73,7 @@ class Emoji: return mod.container.wrap(container) -def parse_emoji(str: str): - tokenizer = Tokenizer(str) +def parse_emoji(tokenizer: Tokenizer): token = tokenizer.parse_next_tokens() if len(tokenizer.errors) > 0 or token is None: @@ -77,8 +94,8 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): emoji = token.children[0] - if not isinstance(emoji, WordToken): - tokenizer.error('Malformed token -- Expected an emoji (word token)') + if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken): + tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token') return modifiers = [] @@ -96,12 +113,16 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): tokenizer.error('Malformed token -- Expected a modifier name (word token)') return + if not modifier.value in modifier_whitelist: + tokenizer.error(f'Unknown modifier: {modifier.value}') + return + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): modifiers.append(Modifier(modifier.value, [])) i += 2 else: args = token.children[i + 2] - modifiers.append(Modifier(modifier.value, *args.children)) + modifiers.append(Modifier(modifier.value, args.children)) i += 3 - return Emoji(emoji.value, modifiers, token) \ No newline at end of file + return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py index bd3db22c3..7d44b84bc 100644 --- a/files/helpers/marseyfx/tokenizer.py +++ b/files/helpers/marseyfx/tokenizer.py @@ -5,10 +5,14 @@ class TokenizerError: index: int error: str - def __init__(self, index: int, error: str): + def __init__(self, tokenizer, index: int, error: str): + self.tokenizer = tokenizer self.index = index self.error = error + def __str__(self): + return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^' + class Tokenizer: str: str index: int @@ -23,6 +27,9 @@ class Tokenizer: return self.index < len(self.str) def peek(self): + if not self.has_next(): + self.error('Unexpected end of input') + return None return self.str[self.index] def eat(self): @@ -34,7 +41,7 @@ class Tokenizer: self.index -= 1 def error(self, error: str): - self.errors.append(TokenizerError(self.index, error)) + self.errors.append(TokenizerError(self, self.index, error)) def token_to_string(self, token): return self.str[token.span[0]:token.span[1]] @@ -43,7 +50,9 @@ class Tokenizer: start = self.index tokens = [] while self.has_next(): - if NumberLiteralToken.can_parse(self): + if self.peek() == ' ': + self.eat() + elif NumberLiteralToken.can_parse(self): tokens.append(NumberLiteralToken.parse(self)) elif WordToken.can_parse(self): tokens.append(WordToken.parse(self)) @@ -65,6 +74,12 @@ class Tokenizer: class Token: span: tuple[int, int] + def wrap(self): + if isinstance(self, GroupToken): + return self + else: + return GroupToken(self.span, [self]) + @staticmethod @abstractmethod def can_parse(tokenizer: Tokenizer) -> bool: @@ -138,6 +153,10 @@ class NumberLiteralToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[-\d]', tokenizer.peek()) + + @staticmethod + def can_parse_next(tokenizer: Tokenizer): return re.fullmatch(r'[-\d\.]', tokenizer.peek()) @staticmethod @@ -145,7 +164,7 @@ class NumberLiteralToken(Token): start = tokenizer.index value = '' while tokenizer.has_next(): - if NumberLiteralToken.can_parse(tokenizer): + if NumberLiteralToken.can_parse_next(tokenizer): value += tokenizer.eat() else: break @@ -179,8 +198,16 @@ class GroupToken(Token): def __init__(self, span: tuple[int, int], children: list[Token]): self.children = children + + # this span is probably wrong tbh but idc self.span = span + def unwrap(self): + if len(self.children) == 1: + return self.children[0] + else: + return self + class ArgsToken(Token): children: list[GroupToken] def __init__(self, span: tuple[int, int], children: list[Token]): @@ -196,13 +223,14 @@ class ArgsToken(Token): start = tokenizer.index tokens = [] while tokenizer.has_next(): + if tokenizer.peek() == '(': + tokenizer.eat() if tokenizer.peek() == ')': tokenizer.eat() break elif tokenizer.peek() == ',': tokenizer.eat() else: - tokenizer.eat() - tokens.extend(tokenizer.parse_next_tokens()) + tokens.append(tokenizer.parse_next_tokens()) return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index f754ce7b9..e71e4e9bf 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) -marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A) +marseyfx_emoji_regex = re.compile(':[\w#!].{0,98}?[^\\\\]:', flags=re.A) snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index eb2b749b3..9c5d26b41 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -8,6 +8,7 @@ from typing_extensions import deprecated from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time from files.helpers.marseyfx.parser import parse_emoji +from files.helpers.marseyfx.tokenizer import Tokenizer from sqlalchemy.sql import func @@ -129,7 +130,7 @@ def build_url_re(tlds, protocols): """ return re.compile( r"""\(*# Match any opening parentheses. - \b(?"]*)? # /path/zz (excluding "unsafe" chars from RFC 1738, @@ -274,18 +275,56 @@ def find_all_emote_endings(word): return endings, word -def render_emojis(markup: str): - emojis_used = set() +class RenderEmojisResult: + emojis_used: set[str] + heavy_count = 0 + tags: list[str] + + def __init__(self): + self.emojis_used = set() + self.tags = [] + + def update(self, other): + self.emojis_used |= other.emojis_used + self.heavy_count += other.heavy_count + self.tags.extend(other.tags) + +def render_emojis(markup: str, **kwargs): + result = RenderEmojisResult() + last_match_end = 0 + + golden = kwargs.get('golden', True) + permit_big = kwargs.get('permit_big', True) for emoji_match in marseyfx_emoji_regex.finditer(markup): - emoji_str = emoji_match.group()[1:-1] # Cut off colons - success, emoji, _ = parse_emoji(emoji_str) - if success: - emojis_used.add(emoji.name) - emoji_html = str(emoji.create_el()) - markup = markup.replace(emoji_match.group(), emoji_html) + previous_text = markup[last_match_end:emoji_match.start()] + if previous_text != '': + result.tags.append(previous_text) + last_match_end = emoji_match.end() - return markup, emojis_used + emoji_str = emoji_match.group()[1:-1] # Cut off colons + + tokenizer = Tokenizer(emoji_str) + success, emoji, _ = parse_emoji(tokenizer) + if success: + result.emojis_used.add(emoji.name) + + if not permit_big: + emoji.is_big = False + + emoji_html = emoji.create_el(tokenizer) + result.tags.append(emoji_html) + + if len(tokenizer.errors) > 0: + soup = BeautifulSoup() + err_tag = soup.new_tag('pre', attrs={'class': 'marseyfx-error'}) + nl = "\n " + err_tag.string = 'MarseyFX error:' + nl + nl.join(map(str,tokenizer.errors)) + result.tags.append(err_tag) + #result.tags.append(f':{emoji_str}:') + + result.tags.append(markup[last_match_end:]) + return result @deprecated("Use the new one") def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): @@ -554,11 +593,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = video_sub_regex.sub(r'

', sanitized) sanitized = audio_sub_regex.sub(r'', sanitized) - if count_emojis: - for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)): - emoji.count += 1 - g.db.add(emoji) - sanitized = sanitized.replace('

', '') allowed_css_properties = allowed_styles.copy() @@ -574,9 +608,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) - sanitized, emojis_used = render_emojis(sanitized) - #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) <-- i have no clue what this means lol soup = BeautifulSoup(sanitized, 'lxml') has_transform = bool(soup.select('[style*=transform i]')) @@ -660,9 +693,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis html = f'

{html}

' sanitized = sanitized.replace(i.group(0), html) - if '
' not in sanitized and blackjack != "rules":
-		sanitized = sanitized.replace('\n','')
-
 	if showmore:
 		# Insert a show more button if the text is too long or has too many paragraphs
 		CHARLIMIT = 3000
@@ -708,7 +738,9 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
 
 	title = remove_cuniform(title)
 
-	title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+	res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+
+	title = ''.join(map(str, res.tags))
 
 	if count_emojis:
 		for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):