diff --git a/.gitignore b/.gitignore index 97a56b27c..4cdeeafe9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ __pycache__/ emojis.zip emojis_original.zip includes/content-security-policy +includes/headers +nginx.conf +.gitignore +docker-compose.yml \ No newline at end of file diff --git a/files/assets/css/main.css b/files/assets/css/main.css index 97696468e..c795b559b 100644 --- a/files/assets/css/main.css +++ b/files/assets/css/main.css @@ -5517,8 +5517,7 @@ input[type=radio] ~ .custom-control-label::before { height: 150px; width: 150px; } -.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] -{ +.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] { max-width: 150px !important; max-height: 150px !important; max-width: min(150px,25vw) !important; @@ -5526,6 +5525,9 @@ input[type=radio] ~ .custom-control-label::before { display: inline-block; object-fit: contain; } +.emoji, .emoji-lg, img[data-kind=Classic] { + image-rendering: pixelated; +} span[data-bs-toggle], .pat-preview { position: relative; diff --git a/files/assets/images/prohibition.svg b/files/assets/images/prohibition.svg new file mode 100644 index 000000000..a9e1c44af --- /dev/null +++ b/files/assets/images/prohibition.svg @@ -0,0 +1,57 @@ + + + + + + + + diff --git a/files/assets/images/scope.svg b/files/assets/images/scope.svg new file mode 100644 index 000000000..35636b9f0 --- /dev/null +++ b/files/assets/images/scope.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index 83f7b7edb..48ec9caf0 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day" PUSH_NOTIF_LIMIT = 1000 -IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local") +IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local") +print(f"IS_LOCALHOST: {IS_LOCALHOST}") if IS_LOCALHOST: SITE_FULL = 'http://' + SITE @@ -1073,9 +1074,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio db_session = scoped_session(sessionmaker(bind=engine, autoflush=False)) approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts])) -csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};" +csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};" if not IS_LOCALHOST: csp += ' upgrade-insecure-requests;' + with open("includes/content-security-policy", "w") as f: - f.write(f'add_header Content-Security-Policy "{csp}";') + f.write(f'add_header Content-Security-Policy "{csp}";') \ No newline at end of file diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py new file mode 100644 index 000000000..679b86ef0 --- /dev/null +++ b/files/helpers/marseyfx/modifiers.py @@ -0,0 +1,315 @@ +import copy +import re +from typing import Optional +from bs4 import BeautifulSoup, Tag +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer +import files.helpers.marseyfx.parser as parser + +modifier_whitelist = [] + +class Modifier: + name: str + args: list[Token] + + def __init__(self, name: str, args: list[Token]): + self.name = name + self.args = args + +def modifier(fn): + modifier_whitelist.append(fn.__name__) + + def wrapper(*args, **kwargs): + slf = args[0] + ctx = ModifierContextFrame(fn.__name__) + slf.context_frames.insert(0, ctx) + slf.child = slf.container + slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'})) + slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self') + res = fn(*args, **kwargs) + slf.context_frames.pop(0) + return res + return wrapper + +def heavy(fn): + def wrapper(*args, **kwargs): + slf = args[0] + slf.heavy_count += 1 + return fn(*args, **kwargs) + return wrapper + +class ModifierContextFrame: + name: str + def __init__(self, name: str): + self.name = name + +class Modified: + soup: BeautifulSoup + container: Tag + child: Tag + tokenizer: Tokenizer + heavy_count = 0 + context_frames: list[ModifierContextFrame] + + def __init__(self, el, tokenizer): + self.soup = BeautifulSoup() + self.container = el + self.tokenizer = tokenizer + self.context_frames = [] + + def ctx(self): + return self.context_frames[0] if len(self.context_frames) > 0 else None + + def add_class(self, class_: str): + if not 'class' in self.container.attrs: + self.container.attrs['class'] = '' + else: + self.container.attrs['class'].append(' ' + class_) + + def add_child_class(self, class_: str): + if not 'class' in self.child.attrs: + self.child.attrs['class'] = '' + else: + self.child.attrs['class'].append(' ' + class_) + + def apply_modifiers(self, modifiers: list[Modifier]): + for modifier in modifiers: + if modifier.name in modifier_whitelist: + getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args)) + + # Using this instead of throwing everything in a string and then parsing it helps + # mitigate the risk of XSS attacks + def image(self, name: str): + + filename = name + + if not '.' in filename: + filename += '.webp' + + image = self.soup.new_tag( + 'img', + loading='lazy', + src=f'{SITE_FULL_IMAGES}/i/{filename}', + attrs={'class': f'marseyfx-image marseyfx-image-{name}'} + ) + + container = self.soup.new_tag( + 'div', + attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'} + ) + + container.append(image) + return container + + def underlay(self, underlay: Tag): + self.container.insert(0, underlay) + + def overlay(self, overlay: Tag): + self.container.append(overlay) + + def add_style(self, style: str): + if 'style' in self.container.attrs: + style = self.container.attrs['style'] + style + + self.container.attrs['style'] = style + + def meme_text(self, text: str, class_: Optional[str] = None): + attrs = {} + if class_ is not None: + attrs = {'class': f'marseyfx-memetext-{class_}'} + + tag = self.soup.new_tag( + 'span', + attrs=attrs + ) + + tag.string = text + + self.overlay(tag) + + def create_other(self, other: GroupToken = None): + wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'}) + + if other is None: + return wrapper + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return wrapper + + other_emoji.is_primary = False + + return other_emoji.create_el(self.tokenizer).wrap(wrapper) + + @modifier + def pat(self): + self.overlay(self.image('hand')) + + @modifier + def love(self): + self.overlay(self.image('love-foreground')) + self.underlay(self.image('love-background')) + + @modifier + def talking(self): + self.overlay(self.image('talking')) + + @modifier + def genocide(self): + pass + + @modifier + def party(self): + pass + + @modifier + def says(self, msg): + if not isinstance(msg, StringLiteralToken): + return + + container = self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-container'} + ) + self.container.append(container) + + container.append(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-says-nub'} + )) + + tag = self.soup.new_tag( + 'span', + attrs={'class': 'marseyfx-modifier-says-text'} + ) + tag.string = msg.value + container.append(tag) + + @modifier + def fallover(self): + self.container = self.container.wrap(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-fallover-container'} + )) + + @modifier + def transform(self, transformstyle: StringLiteralToken): + if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value): + print(f'Evil transform detected: {transformstyle.value}') + return + + self.add_style(f'transform: {transformstyle.value};') + + @heavy + @modifier + def enraged(self): + self.underlay(self.soup.new_tag( + 'div', + attrs={'class': 'marseyfx-modifier-enraged-underlay'} + )) + + @modifier + def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None): + if isinstance(toptext, StringLiteralToken): + self.meme_text(toptext.value, 'toptext') + + if isinstance(bottomtext, StringLiteralToken): + self.meme_text(bottomtext.value, 'bottomtext') + + def bottomtext(self, text: StringLiteralToken): + if not isinstance(text, StringLiteralToken): + return + + tag = self.soup.new_tag( + 'span', + attrs={'class': 'marseyfx-modifier-bottomtext-text'} + ) + + tag.string = text.value + + self.overlay(tag) + + @modifier + def spin(self, speed=None): + if not isinstance(speed, NumberLiteralToken): + return + + self.add_style(f'animation-duration: {1/speed.value}s;') + + @modifier + def triumphs(self, other: GroupToken): + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + print(f'Other emoji: {other_emoji} / Token: {other}') + + if other_emoji is None: + return + + self.add_child_class('marseyfx-modifier-triumphs-self') + + other_emoji.is_primary = False + + other = other_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'}) + ) + self.underlay(other) + + @modifier + def nested(self, inside: GroupToken): + inside = inside.wrap() + inside_emoji = parser.parse_from_token(self.tokenizer, inside) + + if inside_emoji is None: + return + + inside_emoji.is_primary = False + + inside = inside_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'}) + ) + + self.underlay(inside) + + self.add_child_class('marseyfx-modifier-nested-side') + child = self.child + self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'})) + other_side = copy.copy(child) + self.child.append(other_side) + + @modifier + def morph(self, other: GroupToken): + self.add_child_class('marseyfx-modifier-morph-self') + + other = other.wrap() + other_emoji = parser.parse_from_token(self.tokenizer, other) + + if other_emoji is None: + return + + other_emoji.is_primary = False + other = other_emoji.create_el(self.tokenizer).wrap( + self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'}) + ) + + self.container.append(other) + + @heavy + @modifier + def bulge(self, strength: NumberLiteralToken = None): + self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'})) + + @modifier + def prohibition(self): + self.overlay(self.image('prohibition.svg')) + + @modifier + def snipe(self): + self.overlay(self.image('scope.svg')) + self.add_child_class('marseyfx-modifier-snipe-target') + + @modifier + def fucks(self, other: GroupToken): + other = self.create_other(other) + self.container.append(other) \ No newline at end of file diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py new file mode 100644 index 000000000..bfd7c0530 --- /dev/null +++ b/files/helpers/marseyfx/parser.py @@ -0,0 +1,128 @@ +from tokenize import Token + +from bs4 import BeautifulSoup +from files.helpers.config.const import SITE_FULL_IMAGES +from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken +from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist + +emoji_replacers = { + '!': 'is_flipped', + '#': 'is_big', + '@': 'is_user' +} + +class Emoji: + name: str + token: Token + is_big = False + is_flipped = False + is_user = False + modifiers: list[Modifier] + is_primary = True + + def __init__(self, name: str, modifiers, token: Token, **args): + for symbol, value in emoji_replacers.items(): + if symbol in name: + name = name.replace(symbol, '') + setattr(self, value, True) + + self.name = name + self.modifiers = modifiers + self.token = token + self.is_primary = args.get('is_primary', True) + + def create_el(self, tokenizer: Tokenizer): + soup = BeautifulSoup() + + el = soup.new_tag( + 'img', + loading='lazy', + src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', + attrs={ + 'class': f'marseyfx-emoji marseyfx-image', + } + ) + soup.append(el) + el = el.wrap( + soup.new_tag('div', attrs={ + 'class': 'marseyfx-emoji-container' + }) + ) + + mod = Modified(el, tokenizer) + mod.apply_modifiers(self.modifiers) + + + container_attrs = { + 'class': 'marseyfx-container', + } + + if self.is_primary: + container_attrs |= { + 'data-bs-toggle': 'tooltip', + 'title': tokenizer.str + } + + container = soup.new_tag('div', attrs=container_attrs) + + if (self.is_big): + container['class'].append(' marseyfx-big') + + if (self.is_flipped): + container['class'].append(' marseyfx-flipped') + + return mod.container.wrap(container) + +def parse_emoji(tokenizer: Tokenizer): + token = tokenizer.parse_next_tokens() + + if len(tokenizer.errors) > 0 or token is None: + return False, None, token + + emoji = parse_from_token(tokenizer, token) + print(f'Here! {emoji}') + + if not emoji: + return False, None, token + + return True, emoji, token + +def parse_from_token(tokenizer: Tokenizer, token: GroupToken): + if not isinstance(token, GroupToken): + tokenizer.error('Malformed token -- Expected a group token') + return + + emoji = token.children[0] + + if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken): + tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token') + return + + modifiers = [] + + i = 1 + while i + 1 < len(token.children): + t = token.children[i] + + if not isinstance(t, DotToken): + tokenizer.error('Malformed token -- Expected a dot') + return + + modifier = token.children[i + 1] + if not isinstance(modifier, WordToken): + tokenizer.error('Malformed token -- Expected a modifier name (word token)') + return + + if not modifier.value in modifier_whitelist: + tokenizer.error(f'Unknown modifier: {modifier.value}') + return + + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): + modifiers.append(Modifier(modifier.value, [])) + i += 2 + else: + args = token.children[i + 2] + modifiers.append(Modifier(modifier.value, args.children)) + i += 3 + + return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py new file mode 100644 index 000000000..7d44b84bc --- /dev/null +++ b/files/helpers/marseyfx/tokenizer.py @@ -0,0 +1,236 @@ +from abc import abstractmethod +import re + +class TokenizerError: + index: int + error: str + + def __init__(self, tokenizer, index: int, error: str): + self.tokenizer = tokenizer + self.index = index + self.error = error + + def __str__(self): + return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^' + +class Tokenizer: + str: str + index: int + errors: list[TokenizerError] + + def __init__(self, str: str): + self.str = str + self.index = 0 + self.errors = [] + + def has_next(self): + return self.index < len(self.str) + + def peek(self): + if not self.has_next(): + self.error('Unexpected end of input') + return None + return self.str[self.index] + + def eat(self): + c = self.peek() + self.index += 1 + return c + + def barf(self): + self.index -= 1 + + def error(self, error: str): + self.errors.append(TokenizerError(self, self.index, error)) + + def token_to_string(self, token): + return self.str[token.span[0]:token.span[1]] + + def parse_next_tokens(self): + start = self.index + tokens = [] + while self.has_next(): + if self.peek() == ' ': + self.eat() + elif NumberLiteralToken.can_parse(self): + tokens.append(NumberLiteralToken.parse(self)) + elif WordToken.can_parse(self): + tokens.append(WordToken.parse(self)) + elif DotToken.can_parse(self): + tokens.append(DotToken.parse(self)) + elif ArgsToken.can_parse(self): + tokens.append(ArgsToken.parse(self)) + elif StringLiteralToken.can_parse(self): + tokens.append(StringLiteralToken.parse(self)) + else: + break + + if len(tokens) == 0: + self.error('Expected a token') + return None + + return GroupToken((start, self.index), tokens) + +class Token: + span: tuple[int, int] + + def wrap(self): + if isinstance(self, GroupToken): + return self + else: + return GroupToken(self.span, [self]) + + @staticmethod + @abstractmethod + def can_parse(tokenizer: Tokenizer) -> bool: + pass + + @staticmethod + @abstractmethod + def parse(tokenizer: Tokenizer): + pass + +class WordToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek()) + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if WordToken.can_parse(tokenizer): + value += tokenizer.eat() + else: + break + + return WordToken((start, tokenizer.index), value) + +class StringLiteralToken(Token): + value: str + + def __init__(self, span: tuple[int, int], value: str): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '"' + + # i was cuddling with my fwb while writing this ;3 + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokenizer.eat() + value = '' + next_escaped = False + while tokenizer.has_next(): + if tokenizer.peek() == '"' and not next_escaped: + tokenizer.eat() + break + elif tokenizer.peek() == '\\' and not next_escaped: + next_escaped = True + tokenizer.eat() + else: + value += tokenizer.eat() + next_escaped = False + + return StringLiteralToken((start, tokenizer.index), value) + +class NumberLiteralToken(Token): + value: float + + def __init__(self, span: tuple[int, int], value: float): + self.value = value + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return re.fullmatch(r'[-\d]', tokenizer.peek()) + + @staticmethod + def can_parse_next(tokenizer: Tokenizer): + return re.fullmatch(r'[-\d\.]', tokenizer.peek()) + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + value = '' + while tokenizer.has_next(): + if NumberLiteralToken.can_parse_next(tokenizer): + value += tokenizer.eat() + else: + break + + try: + value = float(value) + except ValueError: + tokenizer.error('Invalid number literal') + value = 0.0 + + return NumberLiteralToken((start, tokenizer.index), value) + + def get_float(self): + return float(self.value) + +class DotToken(Token): + def __init__(self, span: tuple[int, int]): + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '.' + + @staticmethod + def parse(tokenizer: Tokenizer): + tokenizer.eat() + return DotToken((tokenizer.index, tokenizer.index + 1)) + +class GroupToken(Token): + children: list[Token] + + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + + # this span is probably wrong tbh but idc + self.span = span + + def unwrap(self): + if len(self.children) == 1: + return self.children[0] + else: + return self + +class ArgsToken(Token): + children: list[GroupToken] + def __init__(self, span: tuple[int, int], children: list[Token]): + self.children = children + self.span = span + + @staticmethod + def can_parse(tokenizer: Tokenizer): + return tokenizer.peek() == '(' + + @staticmethod + def parse(tokenizer: Tokenizer): + start = tokenizer.index + tokens = [] + while tokenizer.has_next(): + if tokenizer.peek() == '(': + tokenizer.eat() + if tokenizer.peek() == ')': + tokenizer.eat() + break + elif tokenizer.peek() == ',': + tokenizer.eat() + else: + tokens.append(tokenizer.parse_next_tokens()) + + return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 35dbc4994..e71e4e9bf 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,6 +52,8 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) +marseyfx_emoji_regex = re.compile(':[\w#!].{0,98}?[^\\\\]:', flags=re.A) + snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A) @@ -118,7 +120,15 @@ twitch_regex = re.compile('(https:\/\/)?(www\.)?twitch.tv\/(.*)', flags=re.I|re. link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))" + NOT_IN_CODE_OR_LINKS, flags=re.A) -css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW +css_url_regex = re.compile('url\([\'"]?((.|\n)*?)[",);}$]', flags=re.I|re.A) # AEVANN, DO NOT TOUCH THIS, IT WENT THROUGH A MILLION ITERATIONS, IT'S PERFECT NOW <-- you probably dont actually need this anymore lol (CSP covers it) +css_style_attr_regex = re.compile('\s*([\w-]+?)\s*:((".*?"|\'.*?\'|\(.*?\)|{.*?}|\[.*?]|[^;])*);?', flags=re.I|re.A) +""" +CSS style attribute regex explanation: +Each match is one declaration. (Example: "color: red;") +Capture groups: +1. The property name (Example: "color") +2. The value, excluding the trailing ";", but including whitespace (Example: " red") +""" linefeeds_regex = re.compile("([^\n])\n([^\n])", flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 3ed11a8a5..9c5d26b41 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -4,8 +4,11 @@ import re import signal from functools import partial from os import path, listdir +from typing_extensions import deprecated from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse import time +from files.helpers.marseyfx.parser import parse_emoji +from files.helpers.marseyfx.tokenizer import Tokenizer from sqlalchemy.sql import func @@ -81,6 +84,7 @@ def allowed_attributes(tag, name, value): if name in {'alt','title'}: return True if name == 'class' and value == 'img': return True if name == 'data-user-submitted' and not value: return True + if name == 'data-kind' and value in EMOJI_KINDS: return True if tag == 'lite-youtube': if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True @@ -126,7 +130,7 @@ def build_url_re(tlds, protocols): """ return re.compile( r"""\(*# Match any opening parentheses. - \b(?"]*)? # /path/zz (excluding "unsafe" chars from RFC 1738, @@ -271,8 +275,59 @@ def find_all_emote_endings(word): return endings, word +class RenderEmojisResult: + emojis_used: set[str] + heavy_count = 0 + tags: list[str] -def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): + def __init__(self): + self.emojis_used = set() + self.tags = [] + + def update(self, other): + self.emojis_used |= other.emojis_used + self.heavy_count += other.heavy_count + self.tags.extend(other.tags) + +def render_emojis(markup: str, **kwargs): + result = RenderEmojisResult() + last_match_end = 0 + + golden = kwargs.get('golden', True) + permit_big = kwargs.get('permit_big', True) + + for emoji_match in marseyfx_emoji_regex.finditer(markup): + previous_text = markup[last_match_end:emoji_match.start()] + if previous_text != '': + result.tags.append(previous_text) + last_match_end = emoji_match.end() + + emoji_str = emoji_match.group()[1:-1] # Cut off colons + + tokenizer = Tokenizer(emoji_str) + success, emoji, _ = parse_emoji(tokenizer) + if success: + result.emojis_used.add(emoji.name) + + if not permit_big: + emoji.is_big = False + + emoji_html = emoji.create_el(tokenizer) + result.tags.append(emoji_html) + + if len(tokenizer.errors) > 0: + soup = BeautifulSoup() + err_tag = soup.new_tag('pre', attrs={'class': 'marseyfx-error'}) + nl = "\n " + err_tag.string = 'MarseyFX error:' + nl + nl.join(map(str,tokenizer.errors)) + result.tags.append(err_tag) + #result.tags.append(f':{emoji_str}:') + + result.tags.append(markup[last_match_end:]) + return result + +@deprecated("Use the new one") +def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): emojis = list(regexp.finditer(html)) captured = set() @@ -323,6 +378,10 @@ def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False): if(is_loved): modifier_html = f'{modifier_html}{loved_html}' + kind = g.db.query(Emoji.kind).filter(Emoji.name==emoji).one_or_none() + if kind: + attrs += ' data-kind="' + kind[0].replace('"', '') + '"' + if (is_patted and emoji != 'marseyunpettable') or is_talking or is_genocided or is_loved: if path.isfile(f"files/assets/images/emojis/{emoji}.webp"): emoji_html = f'{modifier_html}{emoji_partial_pat.format(old, f"{SITE_FULL_IMAGES}/e/{emoji}.webp", attrs)}' @@ -529,40 +588,11 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = spoiler_regex.sub(r'\1', sanitized) - emojis_used = set() - - emojis = list(emoji_regex.finditer(sanitized)) - if len(emojis) > 20: golden = False - - captured = [] - for i in emojis: - if i.group(0) in captured: continue - captured.append(i.group(0)) - - old = i.group(0) - if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old: - new = old.lower().replace(">", " class='mb-0'>") - else: new = old.lower() - - new = render_emoji(new, emoji_regex2, golden, emojis_used, True) - - sanitized = sanitized.replace(old, new) - - emojis = list(emoji_regex2.finditer(sanitized)) - if len(emojis) > 20: golden = False - - sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used) - sanitized = sanitized.replace('&','&') sanitized = video_sub_regex.sub(r'

', sanitized) sanitized = audio_sub_regex.sub(r'', sanitized) - if count_emojis: - for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)): - emoji.count += 1 - g.db.add(emoji) - sanitized = sanitized.replace('

', '') allowed_css_properties = allowed_styles.copy() @@ -578,7 +608,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) - #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) + + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) <-- i have no clue what this means lol soup = BeautifulSoup(sanitized, 'lxml') has_transform = bool(soup.select('[style*=transform i]')) @@ -662,9 +693,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis html = f'

{html}

' sanitized = sanitized.replace(i.group(0), html) - if '
' not in sanitized and blackjack != "rules":
-		sanitized = sanitized.replace('\n','')
-
 	if showmore:
 		# Insert a show more button if the text is too long or has too many paragraphs
 		CHARLIMIT = 3000
@@ -710,9 +738,9 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
 
 	title = remove_cuniform(title)
 
-	emojis_used = set()
+	res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
 
-	title = render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+	title = ''.join(map(str, res.tags))
 
 	if count_emojis:
 		for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
diff --git a/files/helpers/slurs_and_profanities.py b/files/helpers/slurs_and_profanities.py
index 37cd3ef33..7e6c5546e 100644
--- a/files/helpers/slurs_and_profanities.py
+++ b/files/helpers/slurs_and_profanities.py
@@ -11,6 +11,12 @@ def replace_train_html(body):
 	body = body.replace(troon, ':marseytrain2:')
 	return body
 
+trans_valid_copypasta = """You are a real woman. You have no womb, you have no ovaries, you have no eggs, but that doesn't matter. You are a valid human who is trying to feel comfortable in her body.
+All the “validation” you get is pure. Behind your back people love you. Your parents are happy and proud of you, your friends laugh at your jokes behind closed doors, and boys love you, and girls envy you.
+Men absolutely love you. Trans folk who “pass” look ordinary and natural to a man. Your bone structure does not matter. Estradiol widens the hips.
+You will be happy. You will smile every single morning and tell yourself it’s going to be ok, and deep inside you feel the euphoria creeping up like a weed. It is what defines you, not the transphobes.
+Eventually, it’ll be perfect for you - you’ll come out, start HRT, get top surgery, and finally be your ideal self. Your parents will find you, happy and relieved that they finally have a happy daughter. They’ll congratulate you on your hard journey, and every passerby for the rest of eternity will know a woman is what you are."""
+
 SLURS = {
 	"tranny": tranny,
 	"trannie": trannie,
@@ -27,12 +33,16 @@ SLURS = {
 	"(?