From 182ee41d6aa1877c5494737af347b70540acb873 Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Sun, 8 Oct 2023 20:49:40 -0400
Subject: [PATCH] Finish up the checklist
---
files/helpers/marseyfx/modifiers.py | 526 ++++++++++++++--------------
files/helpers/marseyfx/parser.py | 231 ++++++------
files/helpers/marseyfx/tokenizer.py | 370 +++++++++----------
files/helpers/sanitize.py | 77 ++--
4 files changed, 623 insertions(+), 581 deletions(-)
diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py
index 0bac7a635..faebdff48 100644
--- a/files/helpers/marseyfx/modifiers.py
+++ b/files/helpers/marseyfx/modifiers.py
@@ -9,337 +9,337 @@ import files.helpers.marseyfx.parser as parser
modifier_whitelist = []
class Modifier:
- name: str
- args: list[Token]
+ name: str
+ args: list[Token]
- def __init__(self, name: str, args: list[Token]):
- self.name = name
- self.args = args
+ def __init__(self, name: str, args: list[Token]):
+ self.name = name
+ self.args = args
def modifier(fn):
- modifier_whitelist.append(fn.__name__)
+ modifier_whitelist.append(fn.__name__)
- def wrapper(*args, **kwargs):
- slf = args[0]
- ctx = ModifierContextFrame(fn.__name__)
- slf.context_frames.insert(0, ctx)
- slf.child = slf.container
- slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'}))
- slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self')
- res = fn(*args, **kwargs)
- slf.context_frames.pop(0)
- return res
- return wrapper
+ def wrapper(*args, **kwargs):
+ slf = args[0]
+ ctx = ModifierContextFrame(fn.__name__)
+ slf.context_frames.insert(0, ctx)
+ slf.child = slf.container
+ slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'}))
+ slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self')
+ res = fn(*args, **kwargs)
+ slf.context_frames.pop(0)
+ return res
+ return wrapper
def heavy(fn):
- def wrapper(*args, **kwargs):
- slf = args[0]
- slf.heavy_count += 1
- return fn(*args, **kwargs)
- return wrapper
+ def wrapper(*args, **kwargs):
+ slf = args[0]
+ slf.heavy_count += 1
+ return fn(*args, **kwargs)
+ return wrapper
class ModifierContextFrame:
- name: str
- wrap_depth: int = 0
- def __init__(self, name: str):
- self.name = name
+ name: str
+ wrap_depth: int = 0
+ def __init__(self, name: str):
+ self.name = name
class Modified:
- soup: BeautifulSoup
- container: Tag
- child: Tag
- tokenizer: Tokenizer
- heavy_count = 0
- context_frames: list[ModifierContextFrame]
+ soup: BeautifulSoup
+ container: Tag
+ child: Tag
+ tokenizer: Tokenizer
+ heavy_count = 0
+ context_frames: list[ModifierContextFrame]
- def __init__(self, el, tokenizer):
- self.soup = BeautifulSoup()
- self.container = el
- self.tokenizer = tokenizer
- self.context_frames = []
+ def __init__(self, el, tokenizer):
+ self.soup = BeautifulSoup()
+ self.container = el
+ self.tokenizer = tokenizer
+ self.context_frames = []
- def ctx(self):
- return self.context_frames[0] if len(self.context_frames) > 0 else None
+ def ctx(self):
+ return self.context_frames[0] if len(self.context_frames) > 0 else None
- def add_class(self, class_: str):
- if not 'class' in self.container.attrs:
- self.container.attrs['class'] = [class_]
- else:
- self.container.attrs['class'].append(' ' + class_)
+ def add_class(self, class_: str):
+ if not 'class' in self.container.attrs:
+ self.container.attrs['class'] = [class_]
+ else:
+ self.container.attrs['class'].append(' ' + class_)
- def add_child_class(self, class_: str):
- if not 'class' in self.child.attrs:
- self.child.attrs['class'] = [class_]
- else:
- self.child.attrs['class'].append(' ' + class_)
+ def add_child_class(self, class_: str):
+ if not 'class' in self.child.attrs:
+ self.child.attrs['class'] = [class_]
+ else:
+ self.child.attrs['class'].append(' ' + class_)
- def apply_modifiers(self, modifiers: list[Modifier]):
- for modifier in modifiers:
- if modifier.name in modifier_whitelist:
- getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args))
+ def apply_modifiers(self, modifiers: list[Modifier]):
+ for modifier in modifiers:
+ if modifier.name in modifier_whitelist:
+ getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args))
- # Using this instead of throwing everything in a string and then parsing it helps
- # mitigate the risk of XSS attacks
- def image(self, name: str):
+ # Using this instead of throwing everything in a string and then parsing it helps
+ # mitigate the risk of XSS attacks
+ def image(self, name: str):
- filename = name
+ filename = name
- if not '.' in filename:
- filename += '.webp'
+ if not '.' in filename:
+ filename += '.webp'
- image = self.soup.new_tag(
- 'img',
- loading='lazy',
- src=f'{SITE_FULL_IMAGES}/i/{filename}',
- attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
- )
+ image = self.soup.new_tag(
+ 'img',
+ loading='lazy',
+ src=f'{SITE_FULL_IMAGES}/i/{filename}',
+ attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
+ )
- container = self.soup.new_tag(
- 'div',
- attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
- )
+ container = self.soup.new_tag(
+ 'div',
+ attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
+ )
- container.append(image)
- return container
-
- def underlay(self, underlay: Tag):
- self.container.insert(0, underlay)
+ container.append(image)
+ return container
+
+ def underlay(self, underlay: Tag):
+ self.container.insert(0, underlay)
- def overlay(self, overlay: Tag):
- self.container.append(overlay)
+ def overlay(self, overlay: Tag):
+ self.container.append(overlay)
- def add_style(self, style: str):
- if 'style' in self.container.attrs:
- style = self.container.attrs['style'] + style
+ def add_style(self, style: str):
+ if 'style' in self.container.attrs:
+ style = self.container.attrs['style'] + style
- self.container.attrs['style'] = style
+ self.container.attrs['style'] = style
- def meme_text(self, text: str, class_: Optional[str] = None):
- attrs = {}
- if class_ is not None:
- attrs = {'class': f'marseyfx-memetext-{class_}'}
+ def meme_text(self, text: str, class_: Optional[str] = None):
+ attrs = {}
+ if class_ is not None:
+ attrs = {'class': f'marseyfx-memetext-{class_}'}
- tag = self.soup.new_tag(
- 'span',
- attrs=attrs
- )
+ tag = self.soup.new_tag(
+ 'span',
+ attrs=attrs
+ )
- tag.string = text
+ tag.string = text
- self.overlay(tag)
+ self.overlay(tag)
- def create_other(self, other: GroupToken = None):
- wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'})
+ def create_other(self, other: GroupToken = None):
+ wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'})
- if other is None:
- return wrapper
-
- other = other.wrap()
- other_emoji = parser.parse_from_token(self.tokenizer, other)
+ if other is None:
+ return wrapper
+
+ other = other.wrap()
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
- if other_emoji is None:
- return wrapper
-
- other_emoji.is_primary = False
+ if other_emoji is None:
+ return wrapper
+
+ other_emoji.is_primary = False
- return other_emoji.create_el(self.tokenizer).wrap(wrapper)
-
- def wrap_child(self, class_: str = ''):
- ctx = self.ctx()
- wrap_insert = ''
- if ctx.wrap_depth > 0:
- wrap_insert = f'-{ctx.wrap_depth + 1}'
-
- self.child = self.child.wrap(self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-wrapper{wrap_insert} {class_}'}))
+ return other_emoji.create_el(self.tokenizer).wrap(wrapper)
+
+ def wrap_child(self, class_: str = ''):
+ ctx = self.ctx()
+ wrap_insert = ''
+ if ctx.wrap_depth > 0:
+ wrap_insert = f'-{ctx.wrap_depth + 1}'
+
+ self.child = self.child.wrap(self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-wrapper{wrap_insert} {class_}'}))
- ctx.wrap_depth += 1
+ ctx.wrap_depth += 1
- @modifier
- def pat(self):
- self.overlay(self.image('hand'))
+ @modifier
+ def pat(self):
+ self.overlay(self.image('hand'))
- @modifier
- def love(self):
- self.overlay(self.image('love-foreground'))
- self.underlay(self.image('love-background'))
+ @modifier
+ def love(self):
+ self.overlay(self.image('love-foreground'))
+ self.underlay(self.image('love-background'))
- @modifier
- def talking(self):
- self.overlay(self.image('talking'))
+ @modifier
+ def talking(self):
+ self.overlay(self.image('talking'))
- @modifier
- def genocide(self):
- pass
+ @modifier
+ def genocide(self):
+ pass
- @modifier
- def party(self):
- pass
+ @modifier
+ def party(self):
+ pass
- @modifier
- def says(self, msg):
- if not isinstance(msg, StringLiteralToken):
- return
-
- container = self.soup.new_tag(
- 'div',
- attrs={'class': 'marseyfx-modifier-says-container'}
- )
- self.container.append(container)
+ @modifier
+ def says(self, msg):
+ if not isinstance(msg, StringLiteralToken):
+ return
+
+ container = self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-says-container'}
+ )
+ self.container.append(container)
- container.append(self.soup.new_tag(
- 'div',
- attrs={'class': 'marseyfx-modifier-says-nub'}
- ))
+ container.append(self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-says-nub'}
+ ))
- tag = self.soup.new_tag(
- 'span',
- attrs={'class': 'marseyfx-modifier-says-text'}
- )
- tag.string = msg.value
- container.append(tag)
+ tag = self.soup.new_tag(
+ 'span',
+ attrs={'class': 'marseyfx-modifier-says-text'}
+ )
+ tag.string = msg.value
+ container.append(tag)
- @modifier
- def fallover(self):
- self.container = self.container.wrap(self.soup.new_tag(
- 'div',
- attrs={'class': 'marseyfx-modifier-fallover-container'}
- ))
+ @modifier
+ def fallover(self):
+ self.container = self.container.wrap(self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-fallover-container'}
+ ))
- @modifier
- def transform(self, transformstyle: StringLiteralToken):
- if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
- print(f'Evil transform detected: {transformstyle.value}')
- return
-
- self.add_style(f'transform: {transformstyle.value};')
-
- @heavy
- @modifier
- def enraged(self):
- self.underlay(self.soup.new_tag(
- 'div',
- attrs={'class': 'marseyfx-modifier-enraged-underlay'}
- ))
+ @modifier
+ def transform(self, transformstyle: StringLiteralToken):
+ if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
+ print(f'Evil transform detected: {transformstyle.value}')
+ return
+
+ self.add_style(f'transform: {transformstyle.value};')
+
+ @heavy
+ @modifier
+ def enraged(self):
+ self.underlay(self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-enraged-underlay'}
+ ))
- @modifier
- def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None):
- if isinstance(toptext, StringLiteralToken):
- self.meme_text(toptext.value, 'toptext')
+ @modifier
+ def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None):
+ if isinstance(toptext, StringLiteralToken):
+ self.meme_text(toptext.value, 'toptext')
- if isinstance(bottomtext, StringLiteralToken):
- self.meme_text(bottomtext.value, 'bottomtext')
+ if isinstance(bottomtext, StringLiteralToken):
+ self.meme_text(bottomtext.value, 'bottomtext')
- def bottomtext(self, text: StringLiteralToken):
- if not isinstance(text, StringLiteralToken):
- return
+ def bottomtext(self, text: StringLiteralToken):
+ if not isinstance(text, StringLiteralToken):
+ return
- tag = self.soup.new_tag(
- 'span',
- attrs={'class': 'marseyfx-modifier-bottomtext-text'}
- )
+ tag = self.soup.new_tag(
+ 'span',
+ attrs={'class': 'marseyfx-modifier-bottomtext-text'}
+ )
- tag.string = text.value
+ tag.string = text.value
- self.overlay(tag)
+ self.overlay(tag)
- @modifier
- def spin(self, speed=None):
- if not isinstance(speed, NumberLiteralToken):
- return
+ @modifier
+ def spin(self, speed=None):
+ if not isinstance(speed, NumberLiteralToken):
+ return
- self.add_style(f'animation-duration: {1/speed.value}s;')
+ self.add_style(f'animation-duration: {1/speed.value}s;')
- @modifier
- def triumphs(self, other: GroupToken):
- other = other.wrap()
- other_emoji = parser.parse_from_token(self.tokenizer, other)
- print(f'Other emoji: {other_emoji} / Token: {other}')
+ @modifier
+ def triumphs(self, other: GroupToken):
+ other = other.wrap()
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
+ print(f'Other emoji: {other_emoji} / Token: {other}')
- if other_emoji is None:
- return
-
- self.add_child_class('marseyfx-modifier-triumphs-self')
+ if other_emoji is None:
+ return
+
+ self.add_child_class('marseyfx-modifier-triumphs-self')
- other_emoji.is_primary = False
+ other_emoji.is_primary = False
- other = other_emoji.create_el(self.tokenizer).wrap(
- self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
- )
- self.underlay(other)
+ other = other_emoji.create_el(self.tokenizer).wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
+ )
+ self.underlay(other)
- @modifier
- def nested(self, inside: GroupToken):
- inside = inside.wrap()
- inside_emoji = parser.parse_from_token(self.tokenizer, inside)
+ @modifier
+ def nested(self, inside: GroupToken):
+ inside = inside.wrap()
+ inside_emoji = parser.parse_from_token(self.tokenizer, inside)
- if inside_emoji is None:
- return
-
- inside_emoji.is_primary = False
+ if inside_emoji is None:
+ return
+
+ inside_emoji.is_primary = False
- inside = inside_emoji.create_el(self.tokenizer).wrap(
- self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'})
- )
+ inside = inside_emoji.create_el(self.tokenizer).wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'})
+ )
- self.underlay(inside)
+ self.underlay(inside)
- self.add_child_class('marseyfx-modifier-nested-side')
- child = self.child
- self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'}))
- other_side = copy.copy(child)
- self.child.append(other_side)
+ self.add_child_class('marseyfx-modifier-nested-side')
+ child = self.child
+ self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'}))
+ other_side = copy.copy(child)
+ self.child.append(other_side)
- @modifier
- def morph(self, other: GroupToken):
- self.add_child_class('marseyfx-modifier-morph-self')
+ @modifier
+ def morph(self, other: GroupToken):
+ self.add_child_class('marseyfx-modifier-morph-self')
- other = other.wrap()
- other_emoji = parser.parse_from_token(self.tokenizer, other)
+ other = other.wrap()
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
- if other_emoji is None:
- return
-
- other_emoji.is_primary = False
- other = other_emoji.create_el(self.tokenizer).wrap(
- self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'})
- )
+ if other_emoji is None:
+ return
+
+ other_emoji.is_primary = False
+ other = other_emoji.create_el(self.tokenizer).wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'})
+ )
- self.container.append(other)
+ self.container.append(other)
- @heavy
- @modifier
- def bulge(self, strength: NumberLiteralToken = None):
- self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'}))
+ @heavy
+ @modifier
+ def bulge(self, strength: NumberLiteralToken = None):
+ self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'}))
- @modifier
- def prohibition(self):
- self.overlay(self.image('prohibition.svg'))
+ @modifier
+ def prohibition(self):
+ self.overlay(self.image('prohibition.svg'))
- @modifier
- def scope(self):
- self.overlay(self.image('scope.svg'))
- self.add_child_class('marseyfx-modifier-scope-target')
+ @modifier
+ def scope(self):
+ self.overlay(self.image('scope.svg'))
+ self.add_child_class('marseyfx-modifier-scope-target')
- @modifier
- def fucks(self, other: GroupToken):
- other = self.create_other(other)
- self.container.append(other)
+ @modifier
+ def fucks(self, other: GroupToken):
+ other = self.create_other(other)
+ self.container.append(other)
- @heavy
- @modifier
- def glow(self):
- pass
+ @heavy
+ @modifier
+ def glow(self):
+ pass
- @heavy
- @modifier
- def echo(self):
- for i in range(1, 4):
- tag = copy.copy(self.child)
- tag.attrs['class'] = tag.attrs['class'].copy()
- tag.attrs['class'].append(f'marseyfx-modifier-echo-clone marseyfx-modifier-echo-clone-{i}')
- self.container.append(tag)
+ @heavy
+ @modifier
+ def echo(self):
+ for i in range(1, 4):
+ tag = copy.copy(self.child)
+ tag.attrs['class'] = tag.attrs['class'].copy()
+ tag.attrs['class'].append(f'marseyfx-modifier-echo-clone marseyfx-modifier-echo-clone-{i}')
+ self.container.append(tag)
- @modifier
- def rentfree(self):
- self.wrap_child()
- self.overlay(self.image('rentfree.png'))
\ No newline at end of file
+ @modifier
+ def rentfree(self):
+ self.wrap_child()
+ self.overlay(self.image('rentfree.png'))
\ No newline at end of file
diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py
index 7f7b7710b..aec6f0427 100644
--- a/files/helpers/marseyfx/parser.py
+++ b/files/helpers/marseyfx/parser.py
@@ -1,145 +1,162 @@
+import random
from tokenize import Token
from bs4 import BeautifulSoup
-from files.helpers.config.const import SITE_FULL_IMAGES
+from files.helpers.config.const import EMOJI_KINDS, SITE_FULL_IMAGES
from files.helpers.get import get_user
from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken
from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist
+from sqlalchemy.sql import func
+
emoji_replacers = {
- '!': 'is_flipped',
- '#': 'is_big',
- '@': 'is_user'
+ '!': 'is_flipped',
+ '#': 'is_big',
+ '@': 'is_user'
}
class Emoji:
- name: str
- token: Token
- is_big = False
- is_flipped = False
- is_user = False
- modifiers: list[Modifier]
- is_primary = True
+ name: str
+ token: Token
+ is_big = False
+ is_flipped = False
+ is_user = False
+ modifiers: list[Modifier]
+ is_primary = True
+ is_golden = False
- def __init__(self, name: str, modifiers, token: Token, **args):
- for symbol, value in emoji_replacers.items():
- if symbol in name:
- name = name.replace(symbol, '')
- setattr(self, value, True)
+ def __init__(self, name: str, modifiers, token: Token, **args):
+ for symbol, value in emoji_replacers.items():
+ if symbol in name:
+ name = name.replace(symbol, '')
+ setattr(self, value, True)
- self.name = name
- self.modifiers = modifiers
- self.token = token
- self.is_primary = args.get('is_primary', True)
+ if name.endswith('random'):
+ kind = name.split('random')[0].title()
+ if kind == 'Donkeykong': kind = 'Donkey Kong'
+ elif kind == 'Marseyflag': kind = 'Marsey Flags'
+ elif kind == 'Marseyalphabet': kind = 'Marsey Alphabet'
- def create_el(self, tokenizer: Tokenizer):
- soup = BeautifulSoup()
- el = None
- if (self.is_user):
- user = get_user(self.name, graceful=True)
- src = None
- if user:
- src = f'/pp/{user.id}'
-
- el = soup.new_tag(
- 'img',
- loading='lazy',
- src=src,
- attrs={
- 'class': f'marseyfx-emoji marseyfx-image marseyfx-user',
- }
- )
- else:
- el = soup.new_tag(
- 'img',
- loading='lazy',
- src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
- attrs={
- 'class': f'marseyfx-emoji marseyfx-image',
- }
- )
+ if kind in EMOJI_KINDS:
+ name = g.db.query(Emoji.name).filter_by(kind=kind).order_by(func.random()).first()[0]
- soup.append(el)
- el = el.wrap(
- soup.new_tag('div', attrs={
- 'class': 'marseyfx-emoji-container'
- })
- )
+ self.name = name
+ self.modifiers = modifiers
+ self.token = token
+ self.is_primary = args.get('is_primary', True)
+ if random.random() < 0.004:
+ self.is_golden = True
- mod = Modified(el, tokenizer)
- mod.apply_modifiers(self.modifiers)
+ def create_el(self, tokenizer: Tokenizer):
+ soup = BeautifulSoup()
+ el = None
+ if (self.is_user):
+ user = get_user(self.name, graceful=True)
+ src = None
+ if user:
+ src = f'/pp/{user.id}'
+
+ el = soup.new_tag(
+ 'img',
+ loading='lazy',
+ src=src,
+ attrs={
+ 'class': f'marseyfx-emoji marseyfx-image marseyfx-user',
+ }
+ )
+ else:
+ el = soup.new_tag(
+ 'img',
+ loading='lazy',
+ src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
+ attrs={
+ 'class': f'marseyfx-emoji marseyfx-image',
+ }
+ )
+
+ if self.is_golden:
+ el['class'].append(' golden')
+
+ soup.append(el)
+ el = el.wrap(
+ soup.new_tag('div', attrs={
+ 'class': 'marseyfx-emoji-container'
+ })
+ )
+
+ mod = Modified(el, tokenizer)
+ mod.apply_modifiers(self.modifiers)
- container_attrs = {
- 'class': 'marseyfx-container',
- }
+ container_attrs = {
+ 'class': 'marseyfx-container',
+ }
- if self.is_primary:
- container_attrs |= {
- 'data-bs-toggle': 'tooltip',
- 'title': tokenizer.str
- }
+ if self.is_primary:
+ container_attrs |= {
+ 'data-bs-toggle': 'tooltip',
+ 'title': tokenizer.str
+ }
- container = soup.new_tag('div', attrs=container_attrs)
+ container = soup.new_tag('div', attrs=container_attrs)
- if (self.is_big):
- container['class'].append(' marseyfx-big')
+ if (self.is_big):
+ container['class'].append(' marseyfx-big')
- if (self.is_flipped):
- container['class'].append(' marseyfx-flipped')
+ if (self.is_flipped):
+ container['class'].append(' marseyfx-flipped')
- return mod.container.wrap(container)
+ return mod.container.wrap(container), mod.heavy_count
def parse_emoji(tokenizer: Tokenizer):
- token = tokenizer.parse_next_tokens()
+ token = tokenizer.parse_next_tokens()
- if len(tokenizer.errors) > 0 or token is None:
- return False, None, token
+ if len(tokenizer.errors) > 0 or token is None:
+ return False, None, token
- emoji = parse_from_token(tokenizer, token)
- print(f'Here! {emoji}')
+ emoji = parse_from_token(tokenizer, token)
- if not emoji:
- return False, None, token
+ if not emoji:
+ return False, None, token
- return True, emoji, token
+ return True, emoji, token
def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
- if not isinstance(token, GroupToken):
- tokenizer.error('Malformed token -- Expected a group token')
- return
+ if not isinstance(token, GroupToken):
+ tokenizer.error('Malformed token -- Expected a group token')
+ return
- emoji = token.children[0]
+ emoji = token.children[0]
- if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken):
- tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token')
- return
-
- modifiers = []
+ if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken):
+ tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token')
+ return
+
+ modifiers = []
- i = 1
- while i + 1 < len(token.children):
- t = token.children[i]
+ i = 1
+ while i + 1 < len(token.children):
+ t = token.children[i]
- if not isinstance(t, DotToken):
- tokenizer.error('Malformed token -- Expected a dot')
- return
+ if not isinstance(t, DotToken):
+ tokenizer.error('Malformed token -- Expected a dot')
+ return
- modifier = token.children[i + 1]
- if not isinstance(modifier, WordToken):
- tokenizer.error('Malformed token -- Expected a modifier name (word token)')
- return
-
- if not modifier.value in modifier_whitelist:
- tokenizer.error(f'Unknown modifier: {modifier.value}')
- return
+ modifier = token.children[i + 1]
+ if not isinstance(modifier, WordToken):
+ tokenizer.error('Malformed token -- Expected a modifier name (word token)')
+ return
+
+ if not modifier.value in modifier_whitelist:
+ tokenizer.error(f'Unknown modifier: {modifier.value}')
+ return
- if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
- modifiers.append(Modifier(modifier.value, []))
- i += 2
- else:
- args = token.children[i + 2]
- modifiers.append(Modifier(modifier.value, args.children))
- i += 3
+ if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
+ modifiers.append(Modifier(modifier.value, []))
+ i += 2
+ else:
+ args = token.children[i + 2]
+ modifiers.append(Modifier(modifier.value, args.children))
+ i += 3
- return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token)
\ No newline at end of file
+ return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token)
\ No newline at end of file
diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py
index 7c4ce7dcc..16c9a739d 100644
--- a/files/helpers/marseyfx/tokenizer.py
+++ b/files/helpers/marseyfx/tokenizer.py
@@ -2,235 +2,235 @@ from abc import abstractmethod
import re
class TokenizerError:
- index: int
- error: str
+ index: int
+ error: str
- def __init__(self, tokenizer, index: int, error: str):
- self.tokenizer = tokenizer
- self.index = index
- self.error = error
+ def __init__(self, tokenizer, index: int, error: str):
+ self.tokenizer = tokenizer
+ self.index = index
+ self.error = error
- def __str__(self):
- return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^'
+ def __str__(self):
+ return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^'
class Tokenizer:
- str: str
- index: int
- errors: list[TokenizerError]
+ str: str
+ index: int
+ errors: list[TokenizerError]
- def __init__(self, str: str):
- self.str = str
- self.index = 0
- self.errors = []
+ def __init__(self, str: str):
+ self.str = str
+ self.index = 0
+ self.errors = []
- def has_next(self):
- return self.index < len(self.str)
+ def has_next(self):
+ return self.index < len(self.str)
- def peek(self):
- if not self.has_next():
- self.error('Unexpected end of input')
- return None
- return self.str[self.index]
-
- def eat(self):
- c = self.peek()
- self.index += 1
- return c
-
- def barf(self):
- self.index -= 1
-
- def error(self, error: str):
- self.errors.append(TokenizerError(self, self.index, error))
+ def peek(self):
+ if not self.has_next():
+ self.error('Unexpected end of input')
+ return None
+ return self.str[self.index]
+
+ def eat(self):
+ c = self.peek()
+ self.index += 1
+ return c
+
+ def barf(self):
+ self.index -= 1
+
+ def error(self, error: str):
+ self.errors.append(TokenizerError(self, self.index, error))
- def token_to_string(self, token):
- return self.str[token.span[0]:token.span[1]]
+ def token_to_string(self, token):
+ return self.str[token.span[0]:token.span[1]]
- def parse_next_tokens(self):
- start = self.index
- tokens = []
- while self.has_next():
- if self.peek() == ' ':
- self.eat()
- elif NumberLiteralToken.can_parse(self):
- tokens.append(NumberLiteralToken.parse(self))
- elif WordToken.can_parse(self):
- tokens.append(WordToken.parse(self))
- elif DotToken.can_parse(self):
- tokens.append(DotToken.parse(self))
- elif ArgsToken.can_parse(self):
- tokens.append(ArgsToken.parse(self))
- elif StringLiteralToken.can_parse(self):
- tokens.append(StringLiteralToken.parse(self))
- else:
- break
+ def parse_next_tokens(self):
+ start = self.index
+ tokens = []
+ while self.has_next():
+ if self.peek() == ' ':
+ self.eat()
+ elif NumberLiteralToken.can_parse(self):
+ tokens.append(NumberLiteralToken.parse(self))
+ elif WordToken.can_parse(self):
+ tokens.append(WordToken.parse(self))
+ elif DotToken.can_parse(self):
+ tokens.append(DotToken.parse(self))
+ elif ArgsToken.can_parse(self):
+ tokens.append(ArgsToken.parse(self))
+ elif StringLiteralToken.can_parse(self):
+ tokens.append(StringLiteralToken.parse(self))
+ else:
+ break
- if len(tokens) == 0:
- self.error('Expected a token')
- return None
+ if len(tokens) == 0:
+ self.error('Expected a token')
+ return None
- return GroupToken((start, self.index), tokens)
+ return GroupToken((start, self.index), tokens)
class Token:
- span: tuple[int, int]
+ span: tuple[int, int]
- def wrap(self):
- if isinstance(self, GroupToken):
- return self
- else:
- return GroupToken(self.span, [self])
+ def wrap(self):
+ if isinstance(self, GroupToken):
+ return self
+ else:
+ return GroupToken(self.span, [self])
- @staticmethod
- @abstractmethod
- def can_parse(tokenizer: Tokenizer) -> bool:
- pass
+ @staticmethod
+ @abstractmethod
+ def can_parse(tokenizer: Tokenizer) -> bool:
+ pass
- @staticmethod
- @abstractmethod
- def parse(tokenizer: Tokenizer):
- pass
+ @staticmethod
+ @abstractmethod
+ def parse(tokenizer: Tokenizer):
+ pass
class WordToken(Token):
- value: str
+ value: str
- def __init__(self, span: tuple[int, int], value: str):
- self.value = value
- self.span = span
+ def __init__(self, span: tuple[int, int], value: str):
+ self.value = value
+ self.span = span
- @staticmethod
- def can_parse(tokenizer: Tokenizer):
- return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek())
+ @staticmethod
+ def can_parse(tokenizer: Tokenizer):
+ return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek())
- @staticmethod
- def parse(tokenizer: Tokenizer):
- start = tokenizer.index
- value = ''
- while tokenizer.has_next():
- if re.fullmatch(r'[!#@a-zA-Z\d]', tokenizer.peek()):
- value += tokenizer.eat()
- else:
- break
+ @staticmethod
+ def parse(tokenizer: Tokenizer):
+ start = tokenizer.index
+ value = ''
+ while tokenizer.has_next():
+ if re.fullmatch(r'[!#@a-zA-Z\d]', tokenizer.peek()):
+ value += tokenizer.eat()
+ else:
+ break
- return WordToken((start, tokenizer.index), value)
+ return WordToken((start, tokenizer.index), value)
class StringLiteralToken(Token):
- value: str
+ value: str
- def __init__(self, span: tuple[int, int], value: str):
- self.value = value
- self.span = span
+ def __init__(self, span: tuple[int, int], value: str):
+ self.value = value
+ self.span = span
- @staticmethod
- def can_parse(tokenizer: Tokenizer):
- return tokenizer.peek() == '"'
-
- # i was cuddling with my fwb while writing this ;3
- @staticmethod
- def parse(tokenizer: Tokenizer):
- start = tokenizer.index
- tokenizer.eat()
- value = ''
- next_escaped = False
- while tokenizer.has_next():
- if tokenizer.peek() == '"' and not next_escaped:
- tokenizer.eat()
- break
- elif tokenizer.peek() == '\\' and not next_escaped:
- next_escaped = True
- tokenizer.eat()
- else:
- value += tokenizer.eat()
- next_escaped = False
+ @staticmethod
+ def can_parse(tokenizer: Tokenizer):
+ return tokenizer.peek() == '"'
+
+ # i was cuddling with my fwb while writing this ;3
+ @staticmethod
+ def parse(tokenizer: Tokenizer):
+ start = tokenizer.index
+ tokenizer.eat()
+ value = ''
+ next_escaped = False
+ while tokenizer.has_next():
+ if tokenizer.peek() == '"' and not next_escaped:
+ tokenizer.eat()
+ break
+ elif tokenizer.peek() == '\\' and not next_escaped:
+ next_escaped = True
+ tokenizer.eat()
+ else:
+ value += tokenizer.eat()
+ next_escaped = False
- return StringLiteralToken((start, tokenizer.index), value)
-
+ return StringLiteralToken((start, tokenizer.index), value)
+
class NumberLiteralToken(Token):
- value: float
+ value: float
- def __init__(self, span: tuple[int, int], value: float):
- self.value = value
- self.span = span
+ def __init__(self, span: tuple[int, int], value: float):
+ self.value = value
+ self.span = span
- @staticmethod
- def can_parse(tokenizer: Tokenizer):
- return re.fullmatch(r'[-\d]', tokenizer.peek())
-
- @staticmethod
- def can_parse_next(tokenizer: Tokenizer):
- return re.fullmatch(r'[-\d\.]', tokenizer.peek())
+ @staticmethod
+ def can_parse(tokenizer: Tokenizer):
+ return re.fullmatch(r'[-\d]', tokenizer.peek())
+
+ @staticmethod
+ def can_parse_next(tokenizer: Tokenizer):
+ return re.fullmatch(r'[-\d\.]', tokenizer.peek())
- @staticmethod
- def parse(tokenizer: Tokenizer):
- start = tokenizer.index
- value = ''
- while tokenizer.has_next():
- if NumberLiteralToken.can_parse_next(tokenizer):
- value += tokenizer.eat()
- else:
- break
+ @staticmethod
+ def parse(tokenizer: Tokenizer):
+ start = tokenizer.index
+ value = ''
+ while tokenizer.has_next():
+ if NumberLiteralToken.can_parse_next(tokenizer):
+ value += tokenizer.eat()
+ else:
+ break
- try:
- value = float(value)
- except ValueError:
- tokenizer.error('Invalid number literal')
- value = 0.0
+ try:
+ value = float(value)
+ except ValueError:
+ tokenizer.error('Invalid number literal')
+ value = 0.0
- return NumberLiteralToken((start, tokenizer.index), value)
-
- def get_float(self):
- return float(self.value)
+ return NumberLiteralToken((start, tokenizer.index), value)
+
+ def get_float(self):
+ return float(self.value)
class DotToken(Token):
- def __init__(self, span: tuple[int, int]):
- self.span = span
+ def __init__(self, span: tuple[int, int]):
+ self.span = span
- @staticmethod
- def can_parse(tokenizer: Tokenizer):
- return tokenizer.peek() == '.'
+ @staticmethod
+ def can_parse(tokenizer: Tokenizer):
+ return tokenizer.peek() == '.'
- @staticmethod
- def parse(tokenizer: Tokenizer):
- tokenizer.eat()
- return DotToken((tokenizer.index, tokenizer.index + 1))
+ @staticmethod
+ def parse(tokenizer: Tokenizer):
+ tokenizer.eat()
+ return DotToken((tokenizer.index, tokenizer.index + 1))
class GroupToken(Token):
- children: list[Token]
+ children: list[Token]
- def __init__(self, span: tuple[int, int], children: list[Token]):
- self.children = children
+ def __init__(self, span: tuple[int, int], children: list[Token]):
+ self.children = children
- # this span is probably wrong tbh but idc
- self.span = span
+ # this span is probably wrong tbh but idc
+ self.span = span
- def unwrap(self):
- if len(self.children) == 1:
- return self.children[0]
- else:
- return self
+ def unwrap(self):
+ if len(self.children) == 1:
+ return self.children[0]
+ else:
+ return self
class ArgsToken(Token):
- children: list[GroupToken]
- def __init__(self, span: tuple[int, int], children: list[Token]):
- self.children = children
- self.span = span
+ children: list[GroupToken]
+ def __init__(self, span: tuple[int, int], children: list[Token]):
+ self.children = children
+ self.span = span
- @staticmethod
- def can_parse(tokenizer: Tokenizer):
- return tokenizer.peek() == '('
+ @staticmethod
+ def can_parse(tokenizer: Tokenizer):
+ return tokenizer.peek() == '('
- @staticmethod
- def parse(tokenizer: Tokenizer):
- start = tokenizer.index
- tokens = []
- while tokenizer.has_next():
- if tokenizer.peek() == '(':
- tokenizer.eat()
- if tokenizer.peek() == ')':
- tokenizer.eat()
- break
- elif tokenizer.peek() == ',':
- tokenizer.eat()
- else:
- tokens.append(tokenizer.parse_next_tokens())
+ @staticmethod
+ def parse(tokenizer: Tokenizer):
+ start = tokenizer.index
+ tokens = []
+ while tokenizer.has_next():
+ if tokenizer.peek() == '(':
+ tokenizer.eat()
+ if tokenizer.peek() == ')':
+ tokenizer.eat()
+ break
+ elif tokenizer.peek() == ',':
+ tokenizer.eat()
+ else:
+ tokens.append(tokenizer.parse_next_tokens())
- return ArgsToken((start, tokenizer.index), tokens)
\ No newline at end of file
+ return ArgsToken((start, tokenizer.index), tokens)
\ No newline at end of file
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 4190988e9..077aa44b6 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -1,9 +1,11 @@
+import copy
import functools
import random
import re
import signal
from functools import partial
from os import path, listdir
+from typing import Union
from typing_extensions import deprecated
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
import time
@@ -29,6 +31,8 @@ from files.helpers.const_stateful import *
from files.helpers.regex import *
from files.helpers.get import *
+from bs4 import Tag
+
TLDS = ( # Original gTLDs and ccTLDs
'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
@@ -272,7 +276,7 @@ def find_all_emote_endings(word):
class RenderEmojisResult:
emojis_used: set[str]
heavy_count = 0
- tags: list[str]
+ tags: list[Union[str, Tag]]
def __init__(self):
self.emojis_used = set()
@@ -283,11 +287,33 @@ class RenderEmojisResult:
self.heavy_count += other.heavy_count
self.tags.extend(other.tags)
-def render_emojis(markup: str, **kwargs):
+ def db_update_count(self):
+ for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(self.emojis_used)):
+ emoji.count += 1
+ g.db.add(emoji)
+
+def render_emojis_tag(tag: Tag, **kwargs):
+ result = RenderEmojisResult()
+ tag = copy.copy(tag)
+
+ for text_el in tag.find_all(text=True):
+ if not text_el.parent or text_el.parent.name in {'code', 'pre'}:
+ continue
+ res = render_emojis(text_el.text)
+ text_el.replace_with(*res.tags)
+ result.update(res)
+
+ result.tags = [tag]
+
+ return result
+
+def render_emojis(markup: Union[str, Tag], **kwargs):
+ if isinstance(markup, Tag):
+ return render_emojis_tag(markup, **kwargs)
result = RenderEmojisResult()
last_match_end = 0
- golden = kwargs.get('golden', True)
+ permit_golden = kwargs.get('permit_golden', True)
permit_big = kwargs.get('permit_big', True)
for emoji_match in marseyfx_emoji_regex.finditer(markup):
@@ -306,8 +332,12 @@ def render_emojis(markup: str, **kwargs):
if not permit_big:
emoji.is_big = False
- emoji_html = emoji.create_el(tokenizer)
+ if not permit_golden:
+ emoji.is_golden = False
+
+ emoji_html, heavy_count = emoji.create_el(tokenizer)
result.tags.append(emoji_html)
+ result.heavy_count += heavy_count
if len(tokenizer.errors) > 0:
soup = BeautifulSoup()
@@ -595,19 +625,14 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
soup = BeautifulSoup(sanitized, 'lxml')
# -- EMOJI RENDERING --
- emojis_used = set()
+ emoji_render = render_emojis(soup)
+ soup = emoji_render.tags[0]
- for text_el in soup.find_all(text=True):
- if not text_el.parent or text_el.parent.name in {'code', 'pre'}:
- continue
- res = render_emojis(text_el.text)
- text_el.replace_with(*res.tags)
- emojis_used.update(res.emojis_used)
+ if emoji_render.heavy_count > 5:
+ error("Too many heavy emojis! (Max 5)")
if count_emojis:
- for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
- emoji.count += 1
- g.db.add(emoji)
+ emoji_render.db_update_count()
# -- @ MENTIONS --
ping_count = 0
@@ -770,32 +795,32 @@ def allowed_attributes_emojis(tag, name, value):
if name == 'cide' and not value: return True
return False
-
@with_sigalrm_timeout(2)
def filter_emojis_only(title, golden=True, count_emojis=False):
-
+ # XSS warning: do not allow any html tags, otherwise someone could do something like this:
+ # `` because when :marsey: is rendered, it will include quotes that
+ # will end the attribute and allow someone to inject an evil attribute like onerror
title = title.replace("\n", "").replace("\r", "").replace("\t", "").replace('<','<').replace('>','>')
-
title = remove_cuniform(title)
- res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+ title = strikethrough_regex.sub(r'\1\2', title)
+ title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')
+
+ res = render_emojis(title, permit_big=False) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+
+ if res.heavy_count > 0:
+ abort(400, "You can't have heavy/filter emojis in the title!")
title = ''.join(map(str, res.tags))
if count_emojis:
- for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
- emoji.count += 1
- g.db.add(emoji)
+ res.db_update_count()
- title = strikethrough_regex.sub(r'\1\2', title)
-
- title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')
+ title = title.strip()
if len(title) > POST_TITLE_HTML_LENGTH_LIMIT:
abort(400, "Rendered title is too big!")
- title = title.strip()
-
return title
def is_whitelisted(domain, k):