diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py index 83f7b7edb..48ec9caf0 100644 --- a/files/helpers/config/const.py +++ b/files/helpers/config/const.py @@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day" PUSH_NOTIF_LIMIT = 1000 -IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local") +IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local") +print(f"IS_LOCALHOST: {IS_LOCALHOST}") if IS_LOCALHOST: SITE_FULL = 'http://' + SITE @@ -1073,9 +1074,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio db_session = scoped_session(sessionmaker(bind=engine, autoflush=False)) approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts])) -csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};" +csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};" if not IS_LOCALHOST: csp += ' upgrade-insecure-requests;' + with open("includes/content-security-policy", "w") as f: - f.write(f'add_header Content-Security-Policy "{csp}";') + f.write(f'add_header Content-Security-Policy "{csp}";') \ No newline at end of file diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py index 37c44f993..e8d0623de 100644 --- a/files/helpers/marseyfx/modifiers.py +++ b/files/helpers/marseyfx/modifiers.py @@ -1,15 +1,24 @@ +import re from bs4 import BeautifulSoup, Tag from files.helpers.config.const import SITE_FULL_IMAGES -from files.helpers.marseyfx.parser import Modifier -from files.helpers.marseyfx.tokenizer import StringLiteralToken +from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token modifier_whitelist = [] +class Modifier: + name: str + args: list[Token] + + def __init__(self, name: str, args: list[Token]): + self.name = name + self.args = args + def modifier(fn): modifier_whitelist.append(fn.__name__) def wrapper(*args, **kwargs): - args[0].el['class'].append('marseyfx-modifier-' + fn.__name__) + slf = args[0] + slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'})) return fn(*args, **kwargs) return wrapper @@ -19,7 +28,7 @@ class Modified: def __init__(self, el): self.soup = BeautifulSoup() - self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container')) + self.el = el def add_class(self, class_: str): self.el.attrs['class'].append(' ' + class_) @@ -32,12 +41,20 @@ class Modified: # Using this instead of throwing everything in a string and then parsing it helps # mitigate the risk of XSS attacks def image(self, name: str): - return self.soup.new_tag( + image = self.soup.new_tag( 'img', loading='lazy', - class_=f'marseyfx-{name}', - src=f'{SITE_FULL_IMAGES}/i/{name}.webp' - ) + src=f'{SITE_FULL_IMAGES}/i/{name}.webp', + attrs={'class': f'marseyfx-image marseyfx-image-{name}'} + ) + + container = self.soup.new_tag( + 'div', + attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'} + ) + + container.append(image) + return container def underlay(self, underlay: Tag): self.el.insert(0, underlay) @@ -47,16 +64,16 @@ class Modified: @modifier def pat(self): - self.overlay(self.el, self.image('pat')) + self.overlay(self.image('pat')) @modifier def love(self): - self.overlay(self.el, self.image('love-foreground')) - self.underlay(self.el, self.image('love-background')) + self.overlay(self.image('love-foreground')) + self.underlay(self.image('love-background')) @modifier def talking(self): - self.overlay(self.el, self.image('talking')) + self.overlay(self.image('talking')) @modifier def genocide(self): @@ -67,35 +84,34 @@ class Modified: if not isinstance(msg, StringLiteralToken): return - self.overlay(self.el, self.image('says')) + self.overlay(self.image('says')) self.el.append(self.soup.new_tag( 'span', - class_='marseyfx-modifier-says-text', - string=msg.value + string=msg.value, + attrs={'class': 'marseyfx-modifier-says-text'} )) @modifier def fallover(self): - self.el.wrap(self.soup.new_tag( + self.el = self.el.wrap(self.soup.new_tag( 'div', - class_='marseyfx-modifier-fallover-container' + attrs={'class': 'marseyfx-modifier-fallover-container'} )) @modifier - def transform(self, transformstyle: str): - if not transformstyle.fullmatch(r'[\w()\s%\.]*'): + def transform(self, transformstyle: StringLiteralToken): + if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value): + print(f'Evil transform detected: {transformstyle.value}') return - if not 'style' in self.el.attrs: - self.el.attrs['style'] = '' + self.el.attrs['style'] = f'transform: {transformstyle.value};' - self.el.attrs['style'] += f'transform: {transformstyle};' @modifier def enraged(self): self.underlay(self.soup.new_tag( 'div', - class_='marseyfx-enraged-underlay' + attrs={'class': 'marseyfx-enraged-underlay'} )) @modifier diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py index 04f936da2..b567fb44c 100644 --- a/files/helpers/marseyfx/parser.py +++ b/files/helpers/marseyfx/parser.py @@ -3,15 +3,7 @@ from tokenize import Token from bs4 import BeautifulSoup from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken -from modified import Modified - -class Modifier: - name: str - args: list[Token] - - def __init__(self, name: str, args: list[Token]): - self.name = name - self.args = args +from files.helpers.marseyfx.modifiers import Modified, Modifier emoji_replacers = { '!': 'is_flipped', @@ -29,41 +21,49 @@ class Emoji: def __init__(self, name: str, modifiers, token: Token): for symbol, value in emoji_replacers.items(): - name = name.replace(symbol, '') - setattr(self, value, True) + if symbol in name: + name = name.replace(symbol, '') + setattr(self, value, True) self.name = name self.modifiers = modifiers self.token = token + def create_el(self): soup = BeautifulSoup() el = soup.new_tag( 'img', loading='lazy', - class_='marseyfx-emoji', - src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp' + src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp', + attrs={'class': f'marseyfx-emoji marseyfx-image'} + ) + soup.append(el) + el = el.wrap( + soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'}) ) - - if (self.is_big): - el['class'].append(' marseyfx-big') - - if (self.is_flipped): - el['class'].append(' marseyfx-flipped') mod = Modified(el) mod.apply_modifiers(self.modifiers) - return mod.el + container = soup.new_tag('div', attrs={'class': 'marseyfx-container'}) + if (self.is_big): + container['class'].append(' marseyfx-big') + + if (self.is_flipped): + container['class'].append(' marseyfx-flipped') + + return mod.el.wrap(container) def parse_emoji(str: str): tokenizer = Tokenizer(str) token = tokenizer.parse_next_tokens() - if len(tokenizer.errors) > 0: + if len(tokenizer.errors) > 0 or token is None: return False, None, token emoji = parse_from_token(tokenizer, token) + print(f'Here! {emoji}') if not emoji: return False, None, token @@ -75,7 +75,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): tokenizer.error('Malformed token -- Expected a group token') return - emoji = token.tokens[0] + emoji = token.children[0] if not isinstance(emoji, WordToken): tokenizer.error('Malformed token -- Expected an emoji (word token)') @@ -84,24 +84,24 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken): modifiers = [] i = 1 - while i + 1 < len(token.tokens): - t = token.tokens[i] + while i + 1 < len(token.children): + t = token.children[i] if not isinstance(t, DotToken): tokenizer.error('Malformed token -- Expected a dot') return - modifier = token.tokens[i + 1] + modifier = token.children[i + 1] if not isinstance(modifier, WordToken): tokenizer.error('Malformed token -- Expected a modifier name (word token)') return - if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken): + if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken): modifiers.append(Modifier(modifier.value, [])) i += 2 else: - args = token.tokens[i + 2] - modifiers.append(Modifier(modifier.value, args.tokens)) + args = token.children[i + 2] + modifiers.append(Modifier(modifier.value, args.children)) i += 3 return Emoji(emoji.value, modifiers, token) \ No newline at end of file diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py index 6a01129f9..82e859b25 100644 --- a/files/helpers/marseyfx/tokenizer.py +++ b/files/helpers/marseyfx/tokenizer.py @@ -1,4 +1,5 @@ from abc import abstractmethod +import re class TokenizerError: index: int @@ -39,6 +40,7 @@ class Tokenizer: return self.str[token.span[0]:token.span[1]] def parse_next_tokens(self): + print(self.str[self.index:]) start = self.index tokens = [] while self.has_next(): @@ -48,9 +50,18 @@ class Tokenizer: tokens.append(DotToken.parse(self)) elif ArgsToken.can_parse(self): tokens.append(ArgsToken.parse(self)) + elif StringLiteralToken.can_parse(self): + tokens.append(StringLiteralToken.parse(self)) else: break + if len(tokens) == 0: + self.error('Expected a token') + return None + + if len(tokens) == 1: + return tokens[0] + return GroupToken((start, self.index), tokens) class Token: @@ -75,7 +86,7 @@ class WordToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): - return tokenizer.peek().fullmatch(r'[!#\w@]') + return re.fullmatch(r'[!#\w@]', tokenizer.peek()) @staticmethod def parse(tokenizer: Tokenizer): @@ -129,7 +140,7 @@ class NumberLiteralToken(Token): @staticmethod def can_parse(tokenizer: Tokenizer): - return tokenizer.peek().fullmatch(r'[-\d\.]') + return re.fullmatch(r'[-\d\.]', tokenizer.peek()) @staticmethod def parse(tokenizer: Tokenizer): @@ -193,6 +204,7 @@ class ArgsToken(Token): elif tokenizer.peek() == ',': tokenizer.eat() else: + tokenizer.eat() tokens.append(tokenizer.parse_next_tokens()) return ArgsToken((start, tokenizer.index), tokens) \ No newline at end of file diff --git a/files/helpers/regex.py b/files/helpers/regex.py index d0b0f16ea..f754ce7b9 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I) emoji_regex = re.compile(f"

\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A) -marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A) +marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A) snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 044ecea55..eb2b749b3 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -279,7 +279,7 @@ def render_emojis(markup: str): for emoji_match in marseyfx_emoji_regex.finditer(markup): emoji_str = emoji_match.group()[1:-1] # Cut off colons - success, emoji = parse_emoji(emoji_str) + success, emoji, _ = parse_emoji(emoji_str) if success: emojis_used.add(emoji.name) emoji_html = str(emoji.create_el()) @@ -549,8 +549,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis sanitized = spoiler_regex.sub(r'\1', sanitized) - santiized, emojis_used = render_emojis(sanitized) - sanitized = sanitized.replace('&','&') sanitized = video_sub_regex.sub(r'

', sanitized) @@ -576,6 +574,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis parse_email=False, url_re=url_re)] ).clean(sanitized) + sanitized, emojis_used = render_emojis(sanitized) + #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) soup = BeautifulSoup(sanitized, 'lxml')