Wrote a lot of MarseyFX
parent
67e531c4e4
commit
25a33b5538
|
@ -0,0 +1,107 @@
|
|||
from bs4 import BeautifulSoup, Tag
|
||||
from files.helpers.config.const import SITE_FULL_IMAGES
|
||||
from files.helpers.marseyfx.parser import Modifier
|
||||
from files.helpers.marseyfx.tokenizer import StringLiteralToken
|
||||
|
||||
modifier_whitelist = []
|
||||
|
||||
def modifier(fn):
|
||||
modifier_whitelist.append(fn.__name__)
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
args[0].el['class'].append('marseyfx-modifier-' + fn.__name__)
|
||||
return fn(*args, **kwargs)
|
||||
return wrapper
|
||||
|
||||
class Modified:
|
||||
soup: BeautifulSoup
|
||||
el: Tag #BeautifulSoup element
|
||||
|
||||
def __init__(self, el):
|
||||
self.soup = BeautifulSoup()
|
||||
self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container'))
|
||||
|
||||
def add_class(self, class_: str):
|
||||
self.el.attrs['class'].append(' ' + class_)
|
||||
|
||||
def apply_modifiers(self, modifiers: list[Modifier]):
|
||||
for modifier in modifiers:
|
||||
if modifier.name in modifier_whitelist:
|
||||
getattr(self, modifier.name)(*modifier.args)
|
||||
|
||||
# Using this instead of throwing everything in a string and then parsing it helps
|
||||
# mitigate the risk of XSS attacks
|
||||
def image(self, name: str):
|
||||
return self.soup.new_tag(
|
||||
'img',
|
||||
loading='lazy',
|
||||
class_=f'marseyfx-{name}',
|
||||
src=f'{SITE_FULL_IMAGES}/i/{name}.webp'
|
||||
)
|
||||
|
||||
def underlay(self, underlay: Tag):
|
||||
self.el.insert(0, underlay)
|
||||
|
||||
def overlay(self, overlay: Tag):
|
||||
self.el.append(overlay)
|
||||
|
||||
@modifier
|
||||
def pat(self):
|
||||
self.overlay(self.el, self.image('pat'))
|
||||
|
||||
@modifier
|
||||
def love(self):
|
||||
self.overlay(self.el, self.image('love-foreground'))
|
||||
self.underlay(self.el, self.image('love-background'))
|
||||
|
||||
@modifier
|
||||
def talking(self):
|
||||
self.overlay(self.el, self.image('talking'))
|
||||
|
||||
@modifier
|
||||
def genocide(self):
|
||||
pass
|
||||
|
||||
@modifier
|
||||
def says(self, msg):
|
||||
if not isinstance(msg, StringLiteralToken):
|
||||
return
|
||||
|
||||
self.overlay(self.el, self.image('says'))
|
||||
self.el.append(self.soup.new_tag(
|
||||
'span',
|
||||
class_='marseyfx-modifier-says-text',
|
||||
string=msg.value
|
||||
))
|
||||
|
||||
@modifier
|
||||
def fallover(self):
|
||||
self.el.wrap(self.soup.new_tag(
|
||||
'div',
|
||||
class_='marseyfx-modifier-fallover-container'
|
||||
))
|
||||
|
||||
@modifier
|
||||
def transform(self, transformstyle: str):
|
||||
if not transformstyle.fullmatch(r'[\w()\s%\.]*'):
|
||||
return
|
||||
|
||||
if not 'style' in self.el.attrs:
|
||||
self.el.attrs['style'] = ''
|
||||
|
||||
self.el.attrs['style'] += f'transform: {transformstyle};'
|
||||
|
||||
@modifier
|
||||
def enraged(self):
|
||||
self.underlay(self.soup.new_tag(
|
||||
'div',
|
||||
class_='marseyfx-enraged-underlay'
|
||||
))
|
||||
|
||||
@modifier
|
||||
def corrupted(self):
|
||||
pass
|
||||
|
||||
@modifier
|
||||
def wavy(self):
|
||||
self.el.wrap(self.soup.new_tag('svg'))
|
|
@ -0,0 +1,107 @@
|
|||
from tokenize import Token
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from files.helpers.config.const import SITE_FULL_IMAGES
|
||||
from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken
|
||||
from modified import Modified
|
||||
|
||||
class Modifier:
|
||||
name: str
|
||||
args: list[Token]
|
||||
|
||||
def __init__(self, name: str, args: list[Token]):
|
||||
self.name = name
|
||||
self.args = args
|
||||
|
||||
emoji_replacers = {
|
||||
'!': 'is_flipped',
|
||||
'#': 'is_big',
|
||||
'@': 'is_user'
|
||||
}
|
||||
|
||||
class Emoji:
|
||||
name: str
|
||||
token: Token
|
||||
is_big = False
|
||||
is_flipped = False
|
||||
is_user = False
|
||||
modifiers: list[Modifier]
|
||||
|
||||
def __init__(self, name: str, modifiers, token: Token):
|
||||
for symbol, value in emoji_replacers.items():
|
||||
name = name.replace(symbol, '')
|
||||
setattr(self, value, True)
|
||||
|
||||
self.name = name
|
||||
self.modifiers = modifiers
|
||||
self.token = token
|
||||
def create_el(self):
|
||||
soup = BeautifulSoup()
|
||||
|
||||
el = soup.new_tag(
|
||||
'img',
|
||||
loading='lazy',
|
||||
class_='marseyfx-emoji',
|
||||
src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp'
|
||||
)
|
||||
|
||||
if (self.is_big):
|
||||
el['class'].append(' marseyfx-big')
|
||||
|
||||
if (self.is_flipped):
|
||||
el['class'].append(' marseyfx-flipped')
|
||||
|
||||
mod = Modified(el)
|
||||
mod.apply_modifiers(self.modifiers)
|
||||
|
||||
return mod.el
|
||||
|
||||
def parse_emoji(str: str):
|
||||
tokenizer = Tokenizer(str)
|
||||
token = tokenizer.parse_next_tokens()
|
||||
|
||||
if len(tokenizer.errors) > 0:
|
||||
return False, None, token
|
||||
|
||||
emoji = parse_from_token(tokenizer, token)
|
||||
|
||||
if not emoji:
|
||||
return False, None, token
|
||||
|
||||
return True, emoji, token
|
||||
|
||||
def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
|
||||
if not isinstance(token, GroupToken):
|
||||
tokenizer.error('Malformed token -- Expected a group token')
|
||||
return
|
||||
|
||||
emoji = token.tokens[0]
|
||||
|
||||
if not isinstance(emoji, WordToken):
|
||||
tokenizer.error('Malformed token -- Expected an emoji (word token)')
|
||||
return
|
||||
|
||||
modifiers = []
|
||||
|
||||
i = 1
|
||||
while i + 1 < len(token.tokens):
|
||||
t = token.tokens[i]
|
||||
|
||||
if not isinstance(t, DotToken):
|
||||
tokenizer.error('Malformed token -- Expected a dot')
|
||||
return
|
||||
|
||||
modifier = token.tokens[i + 1]
|
||||
if not isinstance(modifier, WordToken):
|
||||
tokenizer.error('Malformed token -- Expected a modifier name (word token)')
|
||||
return
|
||||
|
||||
if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken):
|
||||
modifiers.append(Modifier(modifier.value, []))
|
||||
i += 2
|
||||
else:
|
||||
args = token.tokens[i + 2]
|
||||
modifiers.append(Modifier(modifier.value, args.tokens))
|
||||
i += 3
|
||||
|
||||
return Emoji(emoji.value, modifiers, token)
|
|
@ -0,0 +1,198 @@
|
|||
from abc import abstractmethod
|
||||
|
||||
class TokenizerError:
|
||||
index: int
|
||||
error: str
|
||||
|
||||
def __init__(self, index: int, error: str):
|
||||
self.index = index
|
||||
self.error = error
|
||||
|
||||
class Tokenizer:
|
||||
str: str
|
||||
index: int
|
||||
errors: list[TokenizerError]
|
||||
|
||||
def __init__(self, str: str):
|
||||
self.str = str
|
||||
self.index = 0
|
||||
self.errors = []
|
||||
|
||||
def has_next(self):
|
||||
return self.index < len(self.str)
|
||||
|
||||
def peek(self):
|
||||
return self.str[self.index]
|
||||
|
||||
def eat(self):
|
||||
c = self.peek()
|
||||
self.index += 1
|
||||
return c
|
||||
|
||||
def barf(self):
|
||||
self.index -= 1
|
||||
|
||||
def error(self, error: str):
|
||||
self.errors.append(TokenizerError(self.index, error))
|
||||
|
||||
def token_to_string(self, token):
|
||||
return self.str[token.span[0]:token.span[1]]
|
||||
|
||||
def parse_next_tokens(self):
|
||||
start = self.index
|
||||
tokens = []
|
||||
while self.has_next():
|
||||
if WordToken.can_parse(self):
|
||||
tokens.append(WordToken.parse(self))
|
||||
elif DotToken.can_parse(self):
|
||||
tokens.append(DotToken.parse(self))
|
||||
elif ArgsToken.can_parse(self):
|
||||
tokens.append(ArgsToken.parse(self))
|
||||
else:
|
||||
break
|
||||
|
||||
return GroupToken((start, self.index), tokens)
|
||||
|
||||
class Token:
|
||||
span: tuple[int, int]
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def can_parse(tokenizer: Tokenizer) -> bool:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
pass
|
||||
|
||||
class WordToken(Token):
|
||||
value: str
|
||||
|
||||
def __init__(self, span: tuple[int, int], value: str):
|
||||
self.value = value
|
||||
self.span = span
|
||||
|
||||
@staticmethod
|
||||
def can_parse(tokenizer: Tokenizer):
|
||||
return tokenizer.peek().fullmatch(r'[!#\w@]')
|
||||
|
||||
@staticmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
start = tokenizer.index
|
||||
value = ''
|
||||
while tokenizer.has_next():
|
||||
if WordToken.can_parse(tokenizer):
|
||||
value += tokenizer.eat()
|
||||
else:
|
||||
break
|
||||
|
||||
return WordToken((start, tokenizer.index), value)
|
||||
|
||||
class StringLiteralToken(Token):
|
||||
value: str
|
||||
|
||||
def __init__(self, span: tuple[int, int], value: str):
|
||||
self.value = value
|
||||
self.span = span
|
||||
|
||||
@staticmethod
|
||||
def can_parse(tokenizer: Tokenizer):
|
||||
return tokenizer.peek() == '"'
|
||||
|
||||
# i was cuddling with my fwb while writing this ;3
|
||||
@staticmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
start = tokenizer.index
|
||||
tokenizer.eat()
|
||||
value = ''
|
||||
next_escaped = False
|
||||
while tokenizer.has_next():
|
||||
if tokenizer.peek() == '"' and not next_escaped:
|
||||
tokenizer.eat()
|
||||
break
|
||||
elif tokenizer.peek() == '\\' and not next_escaped:
|
||||
next_escaped = True
|
||||
tokenizer.eat()
|
||||
else:
|
||||
value += tokenizer.eat()
|
||||
next_escaped = False
|
||||
|
||||
return StringLiteralToken((start, tokenizer.index), value)
|
||||
|
||||
class NumberLiteralToken(Token):
|
||||
value: float
|
||||
|
||||
def __init__(self, span: tuple[int, int], value: float):
|
||||
self.value = value
|
||||
self.span = span
|
||||
|
||||
@staticmethod
|
||||
def can_parse(tokenizer: Tokenizer):
|
||||
return tokenizer.peek().fullmatch(r'[-\d\.]')
|
||||
|
||||
@staticmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
start = tokenizer.index
|
||||
value = ''
|
||||
while tokenizer.has_next():
|
||||
if NumberLiteralToken.can_parse(tokenizer):
|
||||
value += tokenizer.eat()
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
value = float(value)
|
||||
except ValueError:
|
||||
tokenizer.error('Invalid number literal')
|
||||
value = 0.0
|
||||
|
||||
return NumberLiteralToken((start, tokenizer.index), value)
|
||||
|
||||
def get_float(self):
|
||||
return float(self.value)
|
||||
|
||||
class DotToken(Token):
|
||||
def __init__(self, span: tuple[int, int]):
|
||||
self.span = span
|
||||
|
||||
@staticmethod
|
||||
def can_parse(tokenizer: Tokenizer):
|
||||
return tokenizer.peek() == '.'
|
||||
|
||||
@staticmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
tokenizer.eat()
|
||||
return DotToken((tokenizer.index, tokenizer.index + 1))
|
||||
|
||||
class GroupToken(Token):
|
||||
children: list[Token]
|
||||
|
||||
def __init__(self, span: tuple[int, int], children: list[Token]):
|
||||
self.children = children
|
||||
self.span = span
|
||||
|
||||
class ArgsToken(Token):
|
||||
children: list[GroupToken]
|
||||
def __init__(self, span: tuple[int, int], children: list[Token]):
|
||||
self.children = children
|
||||
self.span = span
|
||||
|
||||
@staticmethod
|
||||
def can_parse(tokenizer: Tokenizer):
|
||||
return tokenizer.peek() == '('
|
||||
|
||||
@staticmethod
|
||||
def parse(tokenizer: Tokenizer):
|
||||
start = tokenizer.index
|
||||
tokens = []
|
||||
while tokenizer.has_next():
|
||||
if tokenizer.peek() == ')':
|
||||
tokenizer.eat()
|
||||
break
|
||||
elif tokenizer.peek() == ',':
|
||||
tokenizer.eat()
|
||||
else:
|
||||
tokens.append(tokenizer.parse_next_tokens())
|
||||
|
||||
return ArgsToken((start, tokenizer.index), tokens)
|
|
@ -52,6 +52,8 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I)
|
|||
emoji_regex = re.compile(f"<p>\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A)
|
||||
emoji_regex2 = re.compile(f'(?<!"):([!#@\w\-]{{1,72}}?):(?!([^<]*<\/(code|pre)>|[^`]*`))', flags=re.A)
|
||||
|
||||
marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A)
|
||||
|
||||
snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A)
|
||||
|
||||
email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A)
|
||||
|
|
|
@ -4,8 +4,10 @@ import re
|
|||
import signal
|
||||
from functools import partial
|
||||
from os import path, listdir
|
||||
from typing_extensions import deprecated
|
||||
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
|
||||
import time
|
||||
from files.helpers.marseyfx.parser import parse_emoji
|
||||
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
|
@ -271,8 +273,21 @@ def find_all_emote_endings(word):
|
|||
|
||||
return endings, word
|
||||
|
||||
def render_emojis(markup: str):
|
||||
emojis_used = set()
|
||||
|
||||
def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False):
|
||||
for emoji_match in marseyfx_emoji_regex.finditer(markup):
|
||||
emoji_str = emoji_match.group()[1:-1] # Cut off colons
|
||||
success, emoji = parse_emoji(emoji_str)
|
||||
if success:
|
||||
emojis_used.add(emoji.name)
|
||||
emoji_html = str(emoji.create_el())
|
||||
markup = markup.replace(emoji_match.group(), emoji_html)
|
||||
|
||||
return markup, emojis_used
|
||||
|
||||
@deprecated("Use the new one")
|
||||
def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False):
|
||||
emojis = list(regexp.finditer(html))
|
||||
captured = set()
|
||||
|
||||
|
@ -529,29 +544,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
|
||||
sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)
|
||||
|
||||
emojis_used = set()
|
||||
|
||||
emojis = list(emoji_regex.finditer(sanitized))
|
||||
if len(emojis) > 20: golden = False
|
||||
|
||||
captured = []
|
||||
for i in emojis:
|
||||
if i.group(0) in captured: continue
|
||||
captured.append(i.group(0))
|
||||
|
||||
old = i.group(0)
|
||||
if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old:
|
||||
new = old.lower().replace(">", " class='mb-0'>")
|
||||
else: new = old.lower()
|
||||
|
||||
new = render_emoji(new, emoji_regex2, golden, emojis_used, True)
|
||||
|
||||
sanitized = sanitized.replace(old, new)
|
||||
|
||||
emojis = list(emoji_regex2.finditer(sanitized))
|
||||
if len(emojis) > 20: golden = False
|
||||
|
||||
sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used)
|
||||
santiized, emojis_used = render_emojis(sanitized)
|
||||
|
||||
sanitized = sanitized.replace('&','&')
|
||||
|
||||
|
@ -710,9 +703,7 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
|
|||
|
||||
title = remove_cuniform(title)
|
||||
|
||||
emojis_used = set()
|
||||
|
||||
title = render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
|
||||
title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
|
||||
|
||||
if count_emojis:
|
||||
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
|
||||
|
|
Loading…
Reference in New Issue