diff --git a/files/assets/css/main.css b/files/assets/css/main.css index 70d6ebb0f..1998f093c 100644 --- a/files/assets/css/main.css +++ b/files/assets/css/main.css @@ -5514,8 +5514,7 @@ input[type=radio] ~ .custom-control-label::before { height: 150px; width: 150px; } -.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] -{ +.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] { max-width: 150px !important; max-height: 150px !important; max-width: min(150px,25vw) !important; @@ -5523,6 +5522,9 @@ input[type=radio] ~ .custom-control-label::before { display: inline-block; object-fit: contain; } +.emoji, .emoji-lg, img[data-kind=Classic] { + image-rendering: pixelated; +} span[data-bs-toggle], .pat-preview { position: relative; @@ -7741,3 +7743,75 @@ body { resize: none !important; } } + + +/* ~~~ marseyfx ~~~ */ + +.marseyfx-container { + position: relative; + display: inline-flex; + width: 50px; + height: 50px; + vertical-align: middle; +} + +.marseyfx-container.marseyfx-big { + width: 150px; + height: 150px; +} + +.marseyfx-container * { + position: absolute; + inset: 0; + display: flex; +} + +.marseyfx-image { + width: 100%; + height: 100%; + object-fit: contain; +} + +/* Top aligned images */ + +.marseyfx-image-talking { + bottom: unset; + height: unset; +} + +@keyframes marseyfx-modifier-genocide { + 0% { + transform: scale(1) rotate(0deg) + } + + 100% { + transform: scale(0, 0) rotate(360deg) + } +} + +.marseyfx-modifier-genocide { + display: inline-block; + animation: marseyfx-modifier-genocide 2s infinite; + animation-timing-function: linear; +} + +@keyframes marseyfx-modifier-fallover { + 0% { + transform: rotateX(0deg); + } + + 50%, 100% { + transform: rotateX(80deg); + } +} + +.marseyfx-modifier-fallover-container { + perspective: 500px; + perspective-origin: 200px; +} + +.marseyfx-modifier-fallover { + transform-origin: bottom center; + animation: 3s marseyfx-modifier-fallover infinite + cubic-bezier(0.19, -0.02, 0.77, 0.16); +} \ No newline at end of file diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index facdf14a3..3cabbcb4f 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day" PUSH_NOTIF_LIMIT = 1000 -IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local") +IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local") +print(f"IS_LOCALHOST: {IS_LOCALHOST}") if IS_LOCALHOST: SITE_FULL = 'http://' + SITE @@ -1133,9 +1134,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio db_session = scoped_session(sessionmaker(bind=engine, autoflush=False)) approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts])) -csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};" +csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};" if not IS_LOCALHOST: csp += ' upgrade-insecure-requests;' + with open("includes/content-security-policy", "w") as f: - f.write(f'add_header Content-Security-Policy "{csp}";') + f.write(f'add_header Content-Security-Policy "{csp}";') \ No newline at end of file diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py new file mode 100644 index 000000000..e8d0623de --- /dev/null +++ b/files/helpers/marseyfx/modifiers.py @@ -0,0 +1,123 @@ +import re +from bs4 import BeautifulSoup, Tag +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token + +modifier_whitelist = [] + +class Modifier: + name: str + args: list[Token] + + def __init__(self, name: str, args: list[Token]): + self.name = name + self.args = args + +def modifier(fn): + modifier_whitelist.append(fn.__name__) + + def wrapper(*args, **kwargs): + slf = args[0] + slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) + return fn(*args, **kwargs) + return wrapper + +class Modified: + soup: BeautifulSoup + el: Tag #BeautifulSoup element + + def __init__(self, el): + self.soup = BeautifulSoup() + self.el = el + + def add_class(self, class_: str): + self.el.attrs['class'].append(' ' + class_) + + def apply_modifiers(self, modifiers: list[Modifier]): + for modifier in modifiers: + if modifier.name in modifier_whitelist: + getattr(self, modifier.name)(*modifier.args) + + # Using this instead of throwing everything in a string and then parsing it helps + # mitigate the risk of XSS attacks + def image(self, name: str): + image = self.soup.new_tag( + 'img', + loading='lazy', + src=f'{SITE_FULL_IMAGES}/i/{name}.webp', + attrs={'class': f'marseyfx-image marseyfx-image-{name}'} + ) + + container = self.soup.new_tag( + 'div', + attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'} + ) + + container.append(image) + return container + + def underlay(self, underlay: Tag): + self.el.insert(0, underlay) + + def overlay(self, overlay: Tag): + self.el.append(overlay) + + @modifier + def pat(self): + self.overlay(self.image('pat')) + + @modifier + def love(self): + self.overlay(self.image('love-foreground')) + self.underlay(self.image('love-background')) + + @modifier + def talking(self): + self.overlay(self.image('talking')) + + @modifier + def genocide(self): + pass + + @modifier + def says(self, msg): + if not isinstance(msg, StringLiteralToken): + return + + self.overlay(self.image('says')) + self.el.append(self.soup.new_tag( + 'span', + string=msg.value, + attrs={'class': 'marseyfx-modifier-says-text'} + )) + + @modifier + def fallover(self): + self.el = self.el.wrap(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-fallover-container'} + )) + + @modifier + def transform(self, transformstyle: StringLiteralToken): + if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value): + print(f'Evil transform detected: {transformstyle.value}') + return + + self.el.attrs['style'] = f'transform: {transformstyle.value};' + + + @modifier + def enraged(self): + self.underlay(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-enraged-underlay'} + )) + + @modifier + def corrupted(self): + pass + + @modifier + def wavy(self): + self.el.wrap(self.soup.new_tag('svg')) \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py new file mode 100644 index 000000000..b567fb44c --- /dev/null +++ b/files/helpers/marseyfx/parser.py @@ -0,0 +1,107 @@ +from tokenize import Token + +from bs4 import BeautifulSoup +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken +from files.helpers.marseyfx.modifiers import Modified, Modifier + +emoji_replacers = { + '!': 'is_flipped', + '#': 'is_big', + '@': 'is_user' +} + +class Emoji: + name: str + token: Token + is_big = False + is_flipped = False + is_user = False + modifiers: list[Modifier] + + def __init__(self, name: str, modifiers, token: Token): + for symbol, value in emoji_replacers.items(): + if symbol in name: + name = name.replace(symbol, '') + setattr(self, value, True) + + self.name = name + self.modifiers = modifiers + self.token = token + + def create_el(self): + soup = BeautifulSoup() + + el = soup.new_tag( + 'img', + loading='lazy', + src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', + attrs={'class': f'marseyfx-emoji marseyfx-image'} + ) + soup.append(el) + el = el.wrap( + soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'}) + ) + + mod = Modified(el) + mod.apply_modifiers(self.modifiers) + + container = soup.new_tag('div', attrs={'class': 'marseyfx-container'}) + if (self.is_big): + container['class'].append(' marseyfx-big') + + if (self.is_flipped): + container['class'].append(' marseyfx-flipped') + + return mod.el.wrap(container) + +def parse_emoji(str: str): + tokenizer = Tokenizer(str) + token = tokenizer.parse_next_tokens() + + if len(tokenizer.errors) > 0 or token is None: + return False, None, token + + emoji = parse_from_token(tokenizer, token) + print(f'Here! {emoji}') + + if not emoji: + return False, None, token + + return True, emoji, token + +def parse_from_token(tokenizer: Tokenizer, token: GroupToken): + if not isinstance(token, GroupToken): + tokenizer.error('Malformed token -- Expected a group token') + return + + emoji = token.children[0] + + if not isinstance(emoji, WordToken): + tokenizer.error('Malformed token -- Expected an emoji (word token)') + return + + modifiers = [] + + i = 1 + while i + 1 < len(token.children): + t = token.children[i] + + if not isinstance(t, DotToken): + tokenizer.error('Malformed token -- Expected a dot') + return + + modifier = token.children[i + 1] + if not isinstance(modifier, WordToken): + tokenizer.error('Malformed token -- Expected a modifier name (word token)') + return + + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): + modifiers.append(Modifier(modifier.value, [])) + i += 2 + else: + args = token.children[i + 2] + modifiers.append(Modifier(modifier.value, args.children)) + i += 3 + + return Emoji(emoji.value, modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py new file mode 100644 index 000000000..82e859b25 --- /dev/null +++ b/files/helpers/marseyfx/tokenizer.py @@ -0,0 +1,210 @@ +from abc import abstractmethod +import re + +class TokenizerError: + index: int + error: str + + def __init__(self, index: int, error: str): + self.index = index + self.error = error + +class Tokenizer: + str: str + index: int + errors: list[TokenizerError] + + def __init__(self, str: str): + self.str = str + self.index = 0 + self.errors = [] + + def has_next(self): + return self.index < len(self.str) + + def peek(self): + return self.str[self.index] + + def eat(self): + c = self.peek() + self.index += 1 + return c + + def barf(self): + self.index -= 1 + + def error(self, error: str): + self.errors.append(TokenizerError(self.index, error)) + + def token_to_string(self, token): + return self.str[token.span[0]:token.span[1]] + + def parse_next_tokens(self): + print(self.str[self.index:]) + start = self.index + tokens = [] + while self.has_next(): + if WordToken.can_parse(self): + tokens.append(WordToken.parse(self)) + elif DotToken.can_parse(self): + tokens.append(DotToken.parse(self)) + elif ArgsToken.can_parse(self): + tokens.append(ArgsToken.parse(self)) + elif StringLiteralToken.can_parse(self): + tokens.append(StringLiteralToken.parse(self)) + else: + break + + if len(tokens) == 0: + self.error('Expected a token') + return None + + if len(tokens) == 1: + return tokens[0] + + return GroupToken((start, self.index), tokens) + +class Token: + span: tuple[int, int] + + @staticmethod + @abstractmethod + def can_parse(tokenizer: Tokenizer) -> bool: + pass + + @staticmethod + @abstractmethod + def parse(tokenizer: Tokenizer): + pass + +class WordToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[!#\w@]', tokenizer.peek()) + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if WordToken.can_parse(tokenizer): + value += tokenizer.eat() + else: + break + + return WordToken((start, tokenizer.index), value) + +class StringLiteralToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '"' + + # i was cuddling with my fwb while writing this ;3 + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokenizer.eat() + value = '' + next_escaped = False + while tokenizer.has_next(): + if tokenizer.peek() == '"' and not next_escaped: + tokenizer.eat() + break + elif tokenizer.peek() == '\\' and not next_escaped: + next_escaped = True + tokenizer.eat() + else: + value += tokenizer.eat() + next_escaped = False + + return StringLiteralToken((start, tokenizer.index), value) + +class NumberLiteralToken(Token): + value: float + + def __init__(self, span: tuple[int, int], value: float): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[-\d\.]', tokenizer.peek()) + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if NumberLiteralToken.can_parse(tokenizer): + value += tokenizer.eat() + else: + break + + try: + value = float(value) + except ValueError: + tokenizer.error('Invalid number literal') + value = 0.0 + + return NumberLiteralToken((start, tokenizer.index), value) + + def get_float(self): + return float(self.value) + +class DotToken(Token): + def __init__(self, span: tuple[int, int]): + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '.' + + @staticmethod + def parse(tokenizer: Tokenizer): + tokenizer.eat() + return DotToken((tokenizer.index, tokenizer.index + 1)) + +class GroupToken(Token): + children: list[Token] + + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + self.span = span + +class ArgsToken(Token): + children: list[GroupToken] + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '(' + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokens = [] + while tokenizer.has_next(): + if tokenizer.peek() == ')': + tokenizer.eat() + break + elif tokenizer.peek() == ',': + tokenizer.eat() + else: + tokenizer.eat() + tokens.append(tokenizer.parse_next_tokens()) + + return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 857de03fc..4c4231032 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,6 +52,8 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) +marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A) + snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A) @@ -126,7 +128,7 @@ twitch_regex = re.compile('(https:\/\/)?(www\.)?twitch.tv\/(.*)', flags=re.I|re. link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))" + NOT_IN_CODE_OR_LINKS, flags=re.A) -css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW +css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index c8c6922e2..8acb9a882 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -4,8 +4,10 @@ import re import signal from functools import partial from os import path, listdir +from typing_extensions import deprecated from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time +from files.helpers.marseyfx.parser import parse_emoji from sqlalchemy.sql import func @@ -53,7 +55,11 @@ TLDS = ( # Original gTLDs and ccTLDs allowed_tags = ('a','audio','b','big','blockquote','br','center','code','del','details','em','g','h1','h2','h3','h4','h5','h6','hr','i','img','li','lite-youtube','marquee','ol','p','pre','rp','rt','ruby','small','span','spoiler','strike','strong','sub','summary','sup','table','tbody','td','th','thead','tr','u','ul','video') -allowed_styles = ['background-color', 'color', 'filter', 'font-weight', 'text-align'] +allowed_global_styles = ['background-color', 'color', 'filter', 'font-weight', 'text-align'] + +additional_img_styles = ['transform'] + +allowed_styles = allowed_global_styles + additional_img_styles def allowed_attributes(tag, name, value): @@ -80,6 +86,7 @@ def allowed_attributes(tag, name, value): if name in {'alt','title'}: return True if name == 'class' and value == 'img': return True if name == 'data-user-submitted' and not value: return True + if name == 'data-kind' and value in EMOJI_KINDS: return True if tag == 'lite-youtube': if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True @@ -261,8 +268,21 @@ def find_all_emote_endings(word): return endings, word +def render_emojis(markup: str): + emojis_used = set() -def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): + for emoji_match in marseyfx_emoji_regex.finditer(markup): + emoji_str = emoji_match.group()[1:-1] # Cut off colons + success, emoji, _ = parse_emoji(emoji_str) + if success: + emojis_used.add(emoji.name) + emoji_html = str(emoji.create_el()) + markup = markup.replace(emoji_match.group(), emoji_html) + + return markup, emojis_used + +@deprecated("Use the new one") +def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): emojis = list(regexp.finditer(html)) captured = set() @@ -323,6 +343,10 @@ def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): if(is_loved): modifier_html = f'{modifier_html}{loved_html}' + kind = g.db.query(Emoji.kind).filter(Emoji.name==emoji).one_or_none() + if kind: + attrs += ' data-kind="' + kind[0].replace('"', '') + '"' + if (is_patted and emoji != 'marseyunpettable') or is_talking or is_genocided or is_loved: if path.isfile(f"files/assets/images/emojis/{emoji}.webp"): emoji_html = f'{modifier_html}{emoji_partial_pat.format(old, f"{SITE_FULL_IMAGES}/e/{emoji}.webp", attrs)}' @@ -529,30 +553,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = spoiler_regex.sub(r'\1', sanitized) - emojis_used = set() - - emojis = list(emoji_regex.finditer(sanitized)) - if len(emojis) > 20: golden = False - - captured = [] - for i in emojis: - if i.group(0) in captured: continue - captured.append(i.group(0)) - - old = i.group(0) - if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old: - new = old.lower().replace(">", " class='mb-0'>") - else: new = old.lower() - - new = render_emoji(new, emoji_regex2, golden, emojis_used, True) - - sanitized = sanitized.replace(old, new) - - emojis = list(emoji_regex2.finditer(sanitized)) - if len(emojis) > 20: golden = False - - sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used) - sanitized = sanitized.replace('&','&') sanitized = video_sub_regex.sub(r'

', sanitized) @@ -578,9 +578,26 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) + sanitized, emojis_used = render_emojis(sanitized) + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) soup = BeautifulSoup(sanitized, 'lxml') + # style validation + """styled_elements = soup.find_all(style=True) + for element in styled_elements: + # Images have all allowed styles, so we dont need to check these + if element.name == 'img': + # We will wrap the images in a div so that they cannot leave the container + element.wrap(soup.new_tag('div', **{'class': 'transformed-img'})) + continue + + style = element['style'] + matches = css_style_attr_regex.findall(style) + for match in matches: + if match[0] not in allowed_global_styles: + error(f"Invalid style property: {match[0]}")""" + links = soup.find_all("a") if g.v and g.v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]: @@ -705,9 +722,7 @@ def filter_emojis_only(title, golden=True, count_emojis=False): title = remove_cuniform(title) - emojis_used = set() - - title = render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True) + title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True) if count_emojis: for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):