Finish up the checklist

pull/206/head
transbitch 2023-10-08 20:49:40 -04:00
parent e86e2a4c99
commit 182ee41d6a
4 changed files with 623 additions and 581 deletions

View File

@ -9,337 +9,337 @@ import files.helpers.marseyfx.parser as parser
modifier_whitelist = []
class Modifier:
name: str
args: list[Token]
name: str
args: list[Token]
def __init__(self, name: str, args: list[Token]):
self.name = name
self.args = args
def __init__(self, name: str, args: list[Token]):
self.name = name
self.args = args
def modifier(fn):
modifier_whitelist.append(fn.__name__)
modifier_whitelist.append(fn.__name__)
def wrapper(*args, **kwargs):
slf = args[0]
ctx = ModifierContextFrame(fn.__name__)
slf.context_frames.insert(0, ctx)
slf.child = slf.container
slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'}))
slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self')
res = fn(*args, **kwargs)
slf.context_frames.pop(0)
return res
return wrapper
def wrapper(*args, **kwargs):
slf = args[0]
ctx = ModifierContextFrame(fn.__name__)
slf.context_frames.insert(0, ctx)
slf.child = slf.container
slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'}))
slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self')
res = fn(*args, **kwargs)
slf.context_frames.pop(0)
return res
return wrapper
def heavy(fn):
def wrapper(*args, **kwargs):
slf = args[0]
slf.heavy_count += 1
return fn(*args, **kwargs)
return wrapper
def wrapper(*args, **kwargs):
slf = args[0]
slf.heavy_count += 1
return fn(*args, **kwargs)
return wrapper
class ModifierContextFrame:
name: str
wrap_depth: int = 0
def __init__(self, name: str):
self.name = name
name: str
wrap_depth: int = 0
def __init__(self, name: str):
self.name = name
class Modified:
soup: BeautifulSoup
container: Tag
child: Tag
tokenizer: Tokenizer
heavy_count = 0
context_frames: list[ModifierContextFrame]
soup: BeautifulSoup
container: Tag
child: Tag
tokenizer: Tokenizer
heavy_count = 0
context_frames: list[ModifierContextFrame]
def __init__(self, el, tokenizer):
self.soup = BeautifulSoup()
self.container = el
self.tokenizer = tokenizer
self.context_frames = []
def __init__(self, el, tokenizer):
self.soup = BeautifulSoup()
self.container = el
self.tokenizer = tokenizer
self.context_frames = []
def ctx(self):
return self.context_frames[0] if len(self.context_frames) > 0 else None
def ctx(self):
return self.context_frames[0] if len(self.context_frames) > 0 else None
def add_class(self, class_: str):
if not 'class' in self.container.attrs:
self.container.attrs['class'] = [class_]
else:
self.container.attrs['class'].append(' ' + class_)
def add_class(self, class_: str):
if not 'class' in self.container.attrs:
self.container.attrs['class'] = [class_]
else:
self.container.attrs['class'].append(' ' + class_)
def add_child_class(self, class_: str):
if not 'class' in self.child.attrs:
self.child.attrs['class'] = [class_]
else:
self.child.attrs['class'].append(' ' + class_)
def add_child_class(self, class_: str):
if not 'class' in self.child.attrs:
self.child.attrs['class'] = [class_]
else:
self.child.attrs['class'].append(' ' + class_)
def apply_modifiers(self, modifiers: list[Modifier]):
for modifier in modifiers:
if modifier.name in modifier_whitelist:
getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args))
def apply_modifiers(self, modifiers: list[Modifier]):
for modifier in modifiers:
if modifier.name in modifier_whitelist:
getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args))
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
def image(self, name: str):
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
def image(self, name: str):
filename = name
filename = name
if not '.' in filename:
filename += '.webp'
if not '.' in filename:
filename += '.webp'
image = self.soup.new_tag(
'img',
loading='lazy',
src=f'{SITE_FULL_IMAGES}/i/{filename}',
attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
)
image = self.soup.new_tag(
'img',
loading='lazy',
src=f'{SITE_FULL_IMAGES}/i/{filename}',
attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
)
container = self.soup.new_tag(
'div',
attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
)
container = self.soup.new_tag(
'div',
attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
)
container.append(image)
return container
def underlay(self, underlay: Tag):
self.container.insert(0, underlay)
container.append(image)
return container
def underlay(self, underlay: Tag):
self.container.insert(0, underlay)
def overlay(self, overlay: Tag):
self.container.append(overlay)
def overlay(self, overlay: Tag):
self.container.append(overlay)
def add_style(self, style: str):
if 'style' in self.container.attrs:
style = self.container.attrs['style'] + style
def add_style(self, style: str):
if 'style' in self.container.attrs:
style = self.container.attrs['style'] + style
self.container.attrs['style'] = style
self.container.attrs['style'] = style
def meme_text(self, text: str, class_: Optional[str] = None):
attrs = {}
if class_ is not None:
attrs = {'class': f'marseyfx-memetext-{class_}'}
def meme_text(self, text: str, class_: Optional[str] = None):
attrs = {}
if class_ is not None:
attrs = {'class': f'marseyfx-memetext-{class_}'}
tag = self.soup.new_tag(
'span',
attrs=attrs
)
tag = self.soup.new_tag(
'span',
attrs=attrs
)
tag.string = text
tag.string = text
self.overlay(tag)
self.overlay(tag)
def create_other(self, other: GroupToken = None):
wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'})
def create_other(self, other: GroupToken = None):
wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'})
if other is None:
return wrapper
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
if other is None:
return wrapper
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
if other_emoji is None:
return wrapper
other_emoji.is_primary = False
if other_emoji is None:
return wrapper
other_emoji.is_primary = False
return other_emoji.create_el(self.tokenizer).wrap(wrapper)
def wrap_child(self, class_: str = ''):
ctx = self.ctx()
wrap_insert = ''
if ctx.wrap_depth > 0:
wrap_insert = f'-{ctx.wrap_depth + 1}'
self.child = self.child.wrap(self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-wrapper{wrap_insert} {class_}'}))
return other_emoji.create_el(self.tokenizer).wrap(wrapper)
def wrap_child(self, class_: str = ''):
ctx = self.ctx()
wrap_insert = ''
if ctx.wrap_depth > 0:
wrap_insert = f'-{ctx.wrap_depth + 1}'
self.child = self.child.wrap(self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-wrapper{wrap_insert} {class_}'}))
ctx.wrap_depth += 1
ctx.wrap_depth += 1
@modifier
def pat(self):
self.overlay(self.image('hand'))
@modifier
def pat(self):
self.overlay(self.image('hand'))
@modifier
def love(self):
self.overlay(self.image('love-foreground'))
self.underlay(self.image('love-background'))
@modifier
def love(self):
self.overlay(self.image('love-foreground'))
self.underlay(self.image('love-background'))
@modifier
def talking(self):
self.overlay(self.image('talking'))
@modifier
def talking(self):
self.overlay(self.image('talking'))
@modifier
def genocide(self):
pass
@modifier
def genocide(self):
pass
@modifier
def party(self):
pass
@modifier
def party(self):
pass
@modifier
def says(self, msg):
if not isinstance(msg, StringLiteralToken):
return
container = self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-says-container'}
)
self.container.append(container)
@modifier
def says(self, msg):
if not isinstance(msg, StringLiteralToken):
return
container = self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-says-container'}
)
self.container.append(container)
container.append(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-says-nub'}
))
container.append(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-says-nub'}
))
tag = self.soup.new_tag(
'span',
attrs={'class': 'marseyfx-modifier-says-text'}
)
tag.string = msg.value
container.append(tag)
tag = self.soup.new_tag(
'span',
attrs={'class': 'marseyfx-modifier-says-text'}
)
tag.string = msg.value
container.append(tag)
@modifier
def fallover(self):
self.container = self.container.wrap(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-fallover-container'}
))
@modifier
def fallover(self):
self.container = self.container.wrap(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-fallover-container'}
))
@modifier
def transform(self, transformstyle: StringLiteralToken):
if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
print(f'Evil transform detected: {transformstyle.value}')
return
self.add_style(f'transform: {transformstyle.value};')
@heavy
@modifier
def enraged(self):
self.underlay(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-enraged-underlay'}
))
@modifier
def transform(self, transformstyle: StringLiteralToken):
if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
print(f'Evil transform detected: {transformstyle.value}')
return
self.add_style(f'transform: {transformstyle.value};')
@heavy
@modifier
def enraged(self):
self.underlay(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-enraged-underlay'}
))
@modifier
def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None):
if isinstance(toptext, StringLiteralToken):
self.meme_text(toptext.value, 'toptext')
@modifier
def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None):
if isinstance(toptext, StringLiteralToken):
self.meme_text(toptext.value, 'toptext')
if isinstance(bottomtext, StringLiteralToken):
self.meme_text(bottomtext.value, 'bottomtext')
if isinstance(bottomtext, StringLiteralToken):
self.meme_text(bottomtext.value, 'bottomtext')
def bottomtext(self, text: StringLiteralToken):
if not isinstance(text, StringLiteralToken):
return
def bottomtext(self, text: StringLiteralToken):
if not isinstance(text, StringLiteralToken):
return
tag = self.soup.new_tag(
'span',
attrs={'class': 'marseyfx-modifier-bottomtext-text'}
)
tag = self.soup.new_tag(
'span',
attrs={'class': 'marseyfx-modifier-bottomtext-text'}
)
tag.string = text.value
tag.string = text.value
self.overlay(tag)
self.overlay(tag)
@modifier
def spin(self, speed=None):
if not isinstance(speed, NumberLiteralToken):
return
@modifier
def spin(self, speed=None):
if not isinstance(speed, NumberLiteralToken):
return
self.add_style(f'animation-duration: {1/speed.value}s;')
self.add_style(f'animation-duration: {1/speed.value}s;')
@modifier
def triumphs(self, other: GroupToken):
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
print(f'Other emoji: {other_emoji} / Token: {other}')
@modifier
def triumphs(self, other: GroupToken):
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
print(f'Other emoji: {other_emoji} / Token: {other}')
if other_emoji is None:
return
self.add_child_class('marseyfx-modifier-triumphs-self')
if other_emoji is None:
return
self.add_child_class('marseyfx-modifier-triumphs-self')
other_emoji.is_primary = False
other_emoji.is_primary = False
other = other_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
)
self.underlay(other)
other = other_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
)
self.underlay(other)
@modifier
def nested(self, inside: GroupToken):
inside = inside.wrap()
inside_emoji = parser.parse_from_token(self.tokenizer, inside)
@modifier
def nested(self, inside: GroupToken):
inside = inside.wrap()
inside_emoji = parser.parse_from_token(self.tokenizer, inside)
if inside_emoji is None:
return
inside_emoji.is_primary = False
if inside_emoji is None:
return
inside_emoji.is_primary = False
inside = inside_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'})
)
inside = inside_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'})
)
self.underlay(inside)
self.underlay(inside)
self.add_child_class('marseyfx-modifier-nested-side')
child = self.child
self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'}))
other_side = copy.copy(child)
self.child.append(other_side)
self.add_child_class('marseyfx-modifier-nested-side')
child = self.child
self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'}))
other_side = copy.copy(child)
self.child.append(other_side)
@modifier
def morph(self, other: GroupToken):
self.add_child_class('marseyfx-modifier-morph-self')
@modifier
def morph(self, other: GroupToken):
self.add_child_class('marseyfx-modifier-morph-self')
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
if other_emoji is None:
return
other_emoji.is_primary = False
other = other_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'})
)
if other_emoji is None:
return
other_emoji.is_primary = False
other = other_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'})
)
self.container.append(other)
self.container.append(other)
@heavy
@modifier
def bulge(self, strength: NumberLiteralToken = None):
self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'}))
@heavy
@modifier
def bulge(self, strength: NumberLiteralToken = None):
self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'}))
@modifier
def prohibition(self):
self.overlay(self.image('prohibition.svg'))
@modifier
def prohibition(self):
self.overlay(self.image('prohibition.svg'))
@modifier
def scope(self):
self.overlay(self.image('scope.svg'))
self.add_child_class('marseyfx-modifier-scope-target')
@modifier
def scope(self):
self.overlay(self.image('scope.svg'))
self.add_child_class('marseyfx-modifier-scope-target')
@modifier
def fucks(self, other: GroupToken):
other = self.create_other(other)
self.container.append(other)
@modifier
def fucks(self, other: GroupToken):
other = self.create_other(other)
self.container.append(other)
@heavy
@modifier
def glow(self):
pass
@heavy
@modifier
def glow(self):
pass
@heavy
@modifier
def echo(self):
for i in range(1, 4):
tag = copy.copy(self.child)
tag.attrs['class'] = tag.attrs['class'].copy()
tag.attrs['class'].append(f'marseyfx-modifier-echo-clone marseyfx-modifier-echo-clone-{i}')
self.container.append(tag)
@heavy
@modifier
def echo(self):
for i in range(1, 4):
tag = copy.copy(self.child)
tag.attrs['class'] = tag.attrs['class'].copy()
tag.attrs['class'].append(f'marseyfx-modifier-echo-clone marseyfx-modifier-echo-clone-{i}')
self.container.append(tag)
@modifier
def rentfree(self):
self.wrap_child()
self.overlay(self.image('rentfree.png'))
@modifier
def rentfree(self):
self.wrap_child()
self.overlay(self.image('rentfree.png'))

View File

@ -1,145 +1,162 @@
import random
from tokenize import Token
from bs4 import BeautifulSoup
from files.helpers.config.const import SITE_FULL_IMAGES
from files.helpers.config.const import EMOJI_KINDS, SITE_FULL_IMAGES
from files.helpers.get import get_user
from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken
from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist
from sqlalchemy.sql import func
emoji_replacers = {
'!': 'is_flipped',
'#': 'is_big',
'@': 'is_user'
'!': 'is_flipped',
'#': 'is_big',
'@': 'is_user'
}
class Emoji:
name: str
token: Token
is_big = False
is_flipped = False
is_user = False
modifiers: list[Modifier]
is_primary = True
name: str
token: Token
is_big = False
is_flipped = False
is_user = False
modifiers: list[Modifier]
is_primary = True
is_golden = False
def __init__(self, name: str, modifiers, token: Token, **args):
for symbol, value in emoji_replacers.items():
if symbol in name:
name = name.replace(symbol, '')
setattr(self, value, True)
def __init__(self, name: str, modifiers, token: Token, **args):
for symbol, value in emoji_replacers.items():
if symbol in name:
name = name.replace(symbol, '')
setattr(self, value, True)
self.name = name
self.modifiers = modifiers
self.token = token
self.is_primary = args.get('is_primary', True)
if name.endswith('random'):
kind = name.split('random')[0].title()
if kind == 'Donkeykong': kind = 'Donkey Kong'
elif kind == 'Marseyflag': kind = 'Marsey Flags'
elif kind == 'Marseyalphabet': kind = 'Marsey Alphabet'
def create_el(self, tokenizer: Tokenizer):
soup = BeautifulSoup()
el = None
if (self.is_user):
user = get_user(self.name, graceful=True)
src = None
if user:
src = f'/pp/{user.id}'
el = soup.new_tag(
'img',
loading='lazy',
src=src,
attrs={
'class': f'marseyfx-emoji marseyfx-image marseyfx-user',
}
)
else:
el = soup.new_tag(
'img',
loading='lazy',
src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
attrs={
'class': f'marseyfx-emoji marseyfx-image',
}
)
if kind in EMOJI_KINDS:
name = g.db.query(Emoji.name).filter_by(kind=kind).order_by(func.random()).first()[0]
soup.append(el)
el = el.wrap(
soup.new_tag('div', attrs={
'class': 'marseyfx-emoji-container'
})
)
self.name = name
self.modifiers = modifiers
self.token = token
self.is_primary = args.get('is_primary', True)
if random.random() < 0.004:
self.is_golden = True
mod = Modified(el, tokenizer)
mod.apply_modifiers(self.modifiers)
def create_el(self, tokenizer: Tokenizer):
soup = BeautifulSoup()
el = None
if (self.is_user):
user = get_user(self.name, graceful=True)
src = None
if user:
src = f'/pp/{user.id}'
el = soup.new_tag(
'img',
loading='lazy',
src=src,
attrs={
'class': f'marseyfx-emoji marseyfx-image marseyfx-user',
}
)
else:
el = soup.new_tag(
'img',
loading='lazy',
src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
attrs={
'class': f'marseyfx-emoji marseyfx-image',
}
)
if self.is_golden:
el['class'].append(' golden')
soup.append(el)
el = el.wrap(
soup.new_tag('div', attrs={
'class': 'marseyfx-emoji-container'
})
)
mod = Modified(el, tokenizer)
mod.apply_modifiers(self.modifiers)
container_attrs = {
'class': 'marseyfx-container',
}
container_attrs = {
'class': 'marseyfx-container',
}
if self.is_primary:
container_attrs |= {
'data-bs-toggle': 'tooltip',
'title': tokenizer.str
}
if self.is_primary:
container_attrs |= {
'data-bs-toggle': 'tooltip',
'title': tokenizer.str
}
container = soup.new_tag('div', attrs=container_attrs)
container = soup.new_tag('div', attrs=container_attrs)
if (self.is_big):
container['class'].append(' marseyfx-big')
if (self.is_big):
container['class'].append(' marseyfx-big')
if (self.is_flipped):
container['class'].append(' marseyfx-flipped')
if (self.is_flipped):
container['class'].append(' marseyfx-flipped')
return mod.container.wrap(container)
return mod.container.wrap(container), mod.heavy_count
def parse_emoji(tokenizer: Tokenizer):
token = tokenizer.parse_next_tokens()
token = tokenizer.parse_next_tokens()
if len(tokenizer.errors) > 0 or token is None:
return False, None, token
if len(tokenizer.errors) > 0 or token is None:
return False, None, token
emoji = parse_from_token(tokenizer, token)
print(f'Here! {emoji}')
emoji = parse_from_token(tokenizer, token)
if not emoji:
return False, None, token
if not emoji:
return False, None, token
return True, emoji, token
return True, emoji, token
def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
if not isinstance(token, GroupToken):
tokenizer.error('Malformed token -- Expected a group token')
return
if not isinstance(token, GroupToken):
tokenizer.error('Malformed token -- Expected a group token')
return
emoji = token.children[0]
emoji = token.children[0]
if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken):
tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token')
return
modifiers = []
if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken):
tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token')
return
modifiers = []
i = 1
while i + 1 < len(token.children):
t = token.children[i]
i = 1
while i + 1 < len(token.children):
t = token.children[i]
if not isinstance(t, DotToken):
tokenizer.error('Malformed token -- Expected a dot')
return
if not isinstance(t, DotToken):
tokenizer.error('Malformed token -- Expected a dot')
return
modifier = token.children[i + 1]
if not isinstance(modifier, WordToken):
tokenizer.error('Malformed token -- Expected a modifier name (word token)')
return
if not modifier.value in modifier_whitelist:
tokenizer.error(f'Unknown modifier: {modifier.value}')
return
modifier = token.children[i + 1]
if not isinstance(modifier, WordToken):
tokenizer.error('Malformed token -- Expected a modifier name (word token)')
return
if not modifier.value in modifier_whitelist:
tokenizer.error(f'Unknown modifier: {modifier.value}')
return
if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
modifiers.append(Modifier(modifier.value, []))
i += 2
else:
args = token.children[i + 2]
modifiers.append(Modifier(modifier.value, args.children))
i += 3
if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
modifiers.append(Modifier(modifier.value, []))
i += 2
else:
args = token.children[i + 2]
modifiers.append(Modifier(modifier.value, args.children))
i += 3
return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token)
return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token)

View File

@ -2,235 +2,235 @@ from abc import abstractmethod
import re
class TokenizerError:
index: int
error: str
index: int
error: str
def __init__(self, tokenizer, index: int, error: str):
self.tokenizer = tokenizer
self.index = index
self.error = error
def __init__(self, tokenizer, index: int, error: str):
self.tokenizer = tokenizer
self.index = index
self.error = error
def __str__(self):
return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^'
def __str__(self):
return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^'
class Tokenizer:
str: str
index: int
errors: list[TokenizerError]
str: str
index: int
errors: list[TokenizerError]
def __init__(self, str: str):
self.str = str
self.index = 0
self.errors = []
def __init__(self, str: str):
self.str = str
self.index = 0
self.errors = []
def has_next(self):
return self.index < len(self.str)
def has_next(self):
return self.index < len(self.str)
def peek(self):
if not self.has_next():
self.error('Unexpected end of input')
return None
return self.str[self.index]
def eat(self):
c = self.peek()
self.index += 1
return c
def barf(self):
self.index -= 1
def error(self, error: str):
self.errors.append(TokenizerError(self, self.index, error))
def peek(self):
if not self.has_next():
self.error('Unexpected end of input')
return None
return self.str[self.index]
def eat(self):
c = self.peek()
self.index += 1
return c
def barf(self):
self.index -= 1
def error(self, error: str):
self.errors.append(TokenizerError(self, self.index, error))
def token_to_string(self, token):
return self.str[token.span[0]:token.span[1]]
def token_to_string(self, token):
return self.str[token.span[0]:token.span[1]]
def parse_next_tokens(self):
start = self.index
tokens = []
while self.has_next():
if self.peek() == ' ':
self.eat()
elif NumberLiteralToken.can_parse(self):
tokens.append(NumberLiteralToken.parse(self))
elif WordToken.can_parse(self):
tokens.append(WordToken.parse(self))
elif DotToken.can_parse(self):
tokens.append(DotToken.parse(self))
elif ArgsToken.can_parse(self):
tokens.append(ArgsToken.parse(self))
elif StringLiteralToken.can_parse(self):
tokens.append(StringLiteralToken.parse(self))
else:
break
def parse_next_tokens(self):
start = self.index
tokens = []
while self.has_next():
if self.peek() == ' ':
self.eat()
elif NumberLiteralToken.can_parse(self):
tokens.append(NumberLiteralToken.parse(self))
elif WordToken.can_parse(self):
tokens.append(WordToken.parse(self))
elif DotToken.can_parse(self):
tokens.append(DotToken.parse(self))
elif ArgsToken.can_parse(self):
tokens.append(ArgsToken.parse(self))
elif StringLiteralToken.can_parse(self):
tokens.append(StringLiteralToken.parse(self))
else:
break
if len(tokens) == 0:
self.error('Expected a token')
return None
if len(tokens) == 0:
self.error('Expected a token')
return None
return GroupToken((start, self.index), tokens)
return GroupToken((start, self.index), tokens)
class Token:
span: tuple[int, int]
span: tuple[int, int]
def wrap(self):
if isinstance(self, GroupToken):
return self
else:
return GroupToken(self.span, [self])
def wrap(self):
if isinstance(self, GroupToken):
return self
else:
return GroupToken(self.span, [self])
@staticmethod
@abstractmethod
def can_parse(tokenizer: Tokenizer) -> bool:
pass
@staticmethod
@abstractmethod
def can_parse(tokenizer: Tokenizer) -> bool:
pass
@staticmethod
@abstractmethod
def parse(tokenizer: Tokenizer):
pass
@staticmethod
@abstractmethod
def parse(tokenizer: Tokenizer):
pass
class WordToken(Token):
value: str
value: str
def __init__(self, span: tuple[int, int], value: str):
self.value = value
self.span = span
def __init__(self, span: tuple[int, int], value: str):
self.value = value
self.span = span
@staticmethod
def can_parse(tokenizer: Tokenizer):
return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek())
@staticmethod
def can_parse(tokenizer: Tokenizer):
return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek())
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
value = ''
while tokenizer.has_next():
if re.fullmatch(r'[!#@a-zA-Z\d]', tokenizer.peek()):
value += tokenizer.eat()
else:
break
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
value = ''
while tokenizer.has_next():
if re.fullmatch(r'[!#@a-zA-Z\d]', tokenizer.peek()):
value += tokenizer.eat()
else:
break
return WordToken((start, tokenizer.index), value)
return WordToken((start, tokenizer.index), value)
class StringLiteralToken(Token):
value: str
value: str
def __init__(self, span: tuple[int, int], value: str):
self.value = value
self.span = span
def __init__(self, span: tuple[int, int], value: str):
self.value = value
self.span = span
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '"'
# i was cuddling with my fwb while writing this ;3
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
tokenizer.eat()
value = ''
next_escaped = False
while tokenizer.has_next():
if tokenizer.peek() == '"' and not next_escaped:
tokenizer.eat()
break
elif tokenizer.peek() == '\\' and not next_escaped:
next_escaped = True
tokenizer.eat()
else:
value += tokenizer.eat()
next_escaped = False
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '"'
# i was cuddling with my fwb while writing this ;3
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
tokenizer.eat()
value = ''
next_escaped = False
while tokenizer.has_next():
if tokenizer.peek() == '"' and not next_escaped:
tokenizer.eat()
break
elif tokenizer.peek() == '\\' and not next_escaped:
next_escaped = True
tokenizer.eat()
else:
value += tokenizer.eat()
next_escaped = False
return StringLiteralToken((start, tokenizer.index), value)
return StringLiteralToken((start, tokenizer.index), value)
class NumberLiteralToken(Token):
value: float
value: float
def __init__(self, span: tuple[int, int], value: float):
self.value = value
self.span = span
def __init__(self, span: tuple[int, int], value: float):
self.value = value
self.span = span
@staticmethod
def can_parse(tokenizer: Tokenizer):
return re.fullmatch(r'[-\d]', tokenizer.peek())
@staticmethod
def can_parse_next(tokenizer: Tokenizer):
return re.fullmatch(r'[-\d\.]', tokenizer.peek())
@staticmethod
def can_parse(tokenizer: Tokenizer):
return re.fullmatch(r'[-\d]', tokenizer.peek())
@staticmethod
def can_parse_next(tokenizer: Tokenizer):
return re.fullmatch(r'[-\d\.]', tokenizer.peek())
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
value = ''
while tokenizer.has_next():
if NumberLiteralToken.can_parse_next(tokenizer):
value += tokenizer.eat()
else:
break
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
value = ''
while tokenizer.has_next():
if NumberLiteralToken.can_parse_next(tokenizer):
value += tokenizer.eat()
else:
break
try:
value = float(value)
except ValueError:
tokenizer.error('Invalid number literal')
value = 0.0
try:
value = float(value)
except ValueError:
tokenizer.error('Invalid number literal')
value = 0.0
return NumberLiteralToken((start, tokenizer.index), value)
def get_float(self):
return float(self.value)
return NumberLiteralToken((start, tokenizer.index), value)
def get_float(self):
return float(self.value)
class DotToken(Token):
def __init__(self, span: tuple[int, int]):
self.span = span
def __init__(self, span: tuple[int, int]):
self.span = span
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '.'
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '.'
@staticmethod
def parse(tokenizer: Tokenizer):
tokenizer.eat()
return DotToken((tokenizer.index, tokenizer.index + 1))
@staticmethod
def parse(tokenizer: Tokenizer):
tokenizer.eat()
return DotToken((tokenizer.index, tokenizer.index + 1))
class GroupToken(Token):
children: list[Token]
children: list[Token]
def __init__(self, span: tuple[int, int], children: list[Token]):
self.children = children
def __init__(self, span: tuple[int, int], children: list[Token]):
self.children = children
# this span is probably wrong tbh but idc
self.span = span
# this span is probably wrong tbh but idc
self.span = span
def unwrap(self):
if len(self.children) == 1:
return self.children[0]
else:
return self
def unwrap(self):
if len(self.children) == 1:
return self.children[0]
else:
return self
class ArgsToken(Token):
children: list[GroupToken]
def __init__(self, span: tuple[int, int], children: list[Token]):
self.children = children
self.span = span
children: list[GroupToken]
def __init__(self, span: tuple[int, int], children: list[Token]):
self.children = children
self.span = span
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '('
@staticmethod
def can_parse(tokenizer: Tokenizer):
return tokenizer.peek() == '('
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
tokens = []
while tokenizer.has_next():
if tokenizer.peek() == '(':
tokenizer.eat()
if tokenizer.peek() == ')':
tokenizer.eat()
break
elif tokenizer.peek() == ',':
tokenizer.eat()
else:
tokens.append(tokenizer.parse_next_tokens())
@staticmethod
def parse(tokenizer: Tokenizer):
start = tokenizer.index
tokens = []
while tokenizer.has_next():
if tokenizer.peek() == '(':
tokenizer.eat()
if tokenizer.peek() == ')':
tokenizer.eat()
break
elif tokenizer.peek() == ',':
tokenizer.eat()
else:
tokens.append(tokenizer.parse_next_tokens())
return ArgsToken((start, tokenizer.index), tokens)
return ArgsToken((start, tokenizer.index), tokens)

View File

@ -1,9 +1,11 @@
import copy
import functools
import random
import re
import signal
from functools import partial
from os import path, listdir
from typing import Union
from typing_extensions import deprecated
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
import time
@ -29,6 +31,8 @@ from files.helpers.const_stateful import *
from files.helpers.regex import *
from files.helpers.get import *
from bs4 import Tag
TLDS = ( # Original gTLDs and ccTLDs
'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
@ -272,7 +276,7 @@ def find_all_emote_endings(word):
class RenderEmojisResult:
emojis_used: set[str]
heavy_count = 0
tags: list[str]
tags: list[Union[str, Tag]]
def __init__(self):
self.emojis_used = set()
@ -283,11 +287,33 @@ class RenderEmojisResult:
self.heavy_count += other.heavy_count
self.tags.extend(other.tags)
def render_emojis(markup: str, **kwargs):
def db_update_count(self):
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(self.emojis_used)):
emoji.count += 1
g.db.add(emoji)
def render_emojis_tag(tag: Tag, **kwargs):
result = RenderEmojisResult()
tag = copy.copy(tag)
for text_el in tag.find_all(text=True):
if not text_el.parent or text_el.parent.name in {'code', 'pre'}:
continue
res = render_emojis(text_el.text)
text_el.replace_with(*res.tags)
result.update(res)
result.tags = [tag]
return result
def render_emojis(markup: Union[str, Tag], **kwargs):
if isinstance(markup, Tag):
return render_emojis_tag(markup, **kwargs)
result = RenderEmojisResult()
last_match_end = 0
golden = kwargs.get('golden', True)
permit_golden = kwargs.get('permit_golden', True)
permit_big = kwargs.get('permit_big', True)
for emoji_match in marseyfx_emoji_regex.finditer(markup):
@ -306,8 +332,12 @@ def render_emojis(markup: str, **kwargs):
if not permit_big:
emoji.is_big = False
emoji_html = emoji.create_el(tokenizer)
if not permit_golden:
emoji.is_golden = False
emoji_html, heavy_count = emoji.create_el(tokenizer)
result.tags.append(emoji_html)
result.heavy_count += heavy_count
if len(tokenizer.errors) > 0:
soup = BeautifulSoup()
@ -595,19 +625,14 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
soup = BeautifulSoup(sanitized, 'lxml')
# -- EMOJI RENDERING --
emojis_used = set()
emoji_render = render_emojis(soup)
soup = emoji_render.tags[0]
for text_el in soup.find_all(text=True):
if not text_el.parent or text_el.parent.name in {'code', 'pre'}:
continue
res = render_emojis(text_el.text)
text_el.replace_with(*res.tags)
emojis_used.update(res.emojis_used)
if emoji_render.heavy_count > 5:
error("Too many heavy emojis! (Max 5)")
if count_emojis:
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
emoji.count += 1
g.db.add(emoji)
emoji_render.db_update_count()
# -- @ MENTIONS --
ping_count = 0
@ -770,32 +795,32 @@ def allowed_attributes_emojis(tag, name, value):
if name == 'cide' and not value: return True
return False
@with_sigalrm_timeout(2)
def filter_emojis_only(title, golden=True, count_emojis=False):
# XSS warning: do not allow any html tags, otherwise someone could do something like this:
# `<img src=":marsey: evilShit">` because when :marsey: is rendered, it will include quotes that
# will end the attribute and allow someone to inject an evil attribute like onerror
title = title.replace("\n", "").replace("\r", "").replace("\t", "").replace('<','&lt;').replace('>','&gt;')
title = remove_cuniform(title)
res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
title = strikethrough_regex.sub(r'\1<del>\2</del>', title)
title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')
res = render_emojis(title, permit_big=False) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
if res.heavy_count > 0:
abort(400, "You can't have heavy/filter emojis in the title!")
title = ''.join(map(str, res.tags))
if count_emojis:
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
emoji.count += 1
g.db.add(emoji)
res.db_update_count()
title = strikethrough_regex.sub(r'\1<del>\2</del>', title)
title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')
title = title.strip()
if len(title) > POST_TITLE_HTML_LENGTH_LIMIT:
abort(400, "Rendered title is too big!")
title = title.strip()
return title
def is_whitelisted(domain, k):