transforms are back baby

pull/207/head
transbitch 2023-09-18 01:40:02 -04:00
parent 07448a18f4
commit 59d499237d
6 changed files with 90 additions and 60 deletions

View File

@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day"
PUSH_NOTIF_LIMIT = 1000 PUSH_NOTIF_LIMIT = 1000
IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local") IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local")
print(f"IS_LOCALHOST: {IS_LOCALHOST}")
if IS_LOCALHOST: if IS_LOCALHOST:
SITE_FULL = 'http://' + SITE SITE_FULL = 'http://' + SITE
@ -1073,9 +1074,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio
db_session = scoped_session(sessionmaker(bind=engine, autoflush=False)) db_session = scoped_session(sessionmaker(bind=engine, autoflush=False))
approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts])) approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts]))
csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};" csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};"
if not IS_LOCALHOST: if not IS_LOCALHOST:
csp += ' upgrade-insecure-requests;' csp += ' upgrade-insecure-requests;'
with open("includes/content-security-policy", "w") as f: with open("includes/content-security-policy", "w") as f:
f.write(f'add_header Content-Security-Policy "{csp}";') f.write(f'add_header Content-Security-Policy "{csp}";')

View File

@ -1,15 +1,24 @@
import re
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup, Tag
from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.config.const import SITE_FULL_IMAGES
from files.helpers.marseyfx.parser import Modifier from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token
from files.helpers.marseyfx.tokenizer import StringLiteralToken
modifier_whitelist = [] modifier_whitelist = []
class Modifier:
name: str
args: list[Token]
def __init__(self, name: str, args: list[Token]):
self.name = name
self.args = args
def modifier(fn): def modifier(fn):
modifier_whitelist.append(fn.__name__) modifier_whitelist.append(fn.__name__)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
args[0].el['class'].append('marseyfx-modifier-' + fn.__name__) slf = args[0]
slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'}))
return fn(*args, **kwargs) return fn(*args, **kwargs)
return wrapper return wrapper
@ -19,7 +28,7 @@ class Modified:
def __init__(self, el): def __init__(self, el):
self.soup = BeautifulSoup() self.soup = BeautifulSoup()
self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container')) self.el = el
def add_class(self, class_: str): def add_class(self, class_: str):
self.el.attrs['class'].append(' ' + class_) self.el.attrs['class'].append(' ' + class_)
@ -32,12 +41,20 @@ class Modified:
# Using this instead of throwing everything in a string and then parsing it helps # Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks # mitigate the risk of XSS attacks
def image(self, name: str): def image(self, name: str):
return self.soup.new_tag( image = self.soup.new_tag(
'img', 'img',
loading='lazy', loading='lazy',
class_=f'marseyfx-{name}', src=f'{SITE_FULL_IMAGES}/i/{name}.webp',
src=f'{SITE_FULL_IMAGES}/i/{name}.webp' attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
) )
container = self.soup.new_tag(
'div',
attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
)
container.append(image)
return container
def underlay(self, underlay: Tag): def underlay(self, underlay: Tag):
self.el.insert(0, underlay) self.el.insert(0, underlay)
@ -47,16 +64,16 @@ class Modified:
@modifier @modifier
def pat(self): def pat(self):
self.overlay(self.el, self.image('pat')) self.overlay(self.image('pat'))
@modifier @modifier
def love(self): def love(self):
self.overlay(self.el, self.image('love-foreground')) self.overlay(self.image('love-foreground'))
self.underlay(self.el, self.image('love-background')) self.underlay(self.image('love-background'))
@modifier @modifier
def talking(self): def talking(self):
self.overlay(self.el, self.image('talking')) self.overlay(self.image('talking'))
@modifier @modifier
def genocide(self): def genocide(self):
@ -67,35 +84,34 @@ class Modified:
if not isinstance(msg, StringLiteralToken): if not isinstance(msg, StringLiteralToken):
return return
self.overlay(self.el, self.image('says')) self.overlay(self.image('says'))
self.el.append(self.soup.new_tag( self.el.append(self.soup.new_tag(
'span', 'span',
class_='marseyfx-modifier-says-text', string=msg.value,
string=msg.value attrs={'class': 'marseyfx-modifier-says-text'}
)) ))
@modifier @modifier
def fallover(self): def fallover(self):
self.el.wrap(self.soup.new_tag( self.el = self.el.wrap(self.soup.new_tag(
'div', 'div',
class_='marseyfx-modifier-fallover-container' attrs={'class': 'marseyfx-modifier-fallover-container'}
)) ))
@modifier @modifier
def transform(self, transformstyle: str): def transform(self, transformstyle: StringLiteralToken):
if not transformstyle.fullmatch(r'[\w()\s%\.]*'): if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
print(f'Evil transform detected: {transformstyle.value}')
return return
if not 'style' in self.el.attrs: self.el.attrs['style'] = f'transform: {transformstyle.value};'
self.el.attrs['style'] = ''
self.el.attrs['style'] += f'transform: {transformstyle};'
@modifier @modifier
def enraged(self): def enraged(self):
self.underlay(self.soup.new_tag( self.underlay(self.soup.new_tag(
'div', 'div',
class_='marseyfx-enraged-underlay' attrs={'class': 'marseyfx-enraged-underlay'}
)) ))
@modifier @modifier

View File

@ -3,15 +3,7 @@ from tokenize import Token
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from files.helpers.config.const import SITE_FULL_IMAGES from files.helpers.config.const import SITE_FULL_IMAGES
from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken
from modified import Modified from files.helpers.marseyfx.modifiers import Modified, Modifier
class Modifier:
name: str
args: list[Token]
def __init__(self, name: str, args: list[Token]):
self.name = name
self.args = args
emoji_replacers = { emoji_replacers = {
'!': 'is_flipped', '!': 'is_flipped',
@ -29,41 +21,49 @@ class Emoji:
def __init__(self, name: str, modifiers, token: Token): def __init__(self, name: str, modifiers, token: Token):
for symbol, value in emoji_replacers.items(): for symbol, value in emoji_replacers.items():
name = name.replace(symbol, '') if symbol in name:
setattr(self, value, True) name = name.replace(symbol, '')
setattr(self, value, True)
self.name = name self.name = name
self.modifiers = modifiers self.modifiers = modifiers
self.token = token self.token = token
def create_el(self): def create_el(self):
soup = BeautifulSoup() soup = BeautifulSoup()
el = soup.new_tag( el = soup.new_tag(
'img', 'img',
loading='lazy', loading='lazy',
class_='marseyfx-emoji', src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp' attrs={'class': f'marseyfx-emoji marseyfx-image'}
)
soup.append(el)
el = el.wrap(
soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'})
) )
if (self.is_big):
el['class'].append(' marseyfx-big')
if (self.is_flipped):
el['class'].append(' marseyfx-flipped')
mod = Modified(el) mod = Modified(el)
mod.apply_modifiers(self.modifiers) mod.apply_modifiers(self.modifiers)
return mod.el container = soup.new_tag('div', attrs={'class': 'marseyfx-container'})
if (self.is_big):
container['class'].append(' marseyfx-big')
if (self.is_flipped):
container['class'].append(' marseyfx-flipped')
return mod.el.wrap(container)
def parse_emoji(str: str): def parse_emoji(str: str):
tokenizer = Tokenizer(str) tokenizer = Tokenizer(str)
token = tokenizer.parse_next_tokens() token = tokenizer.parse_next_tokens()
if len(tokenizer.errors) > 0: if len(tokenizer.errors) > 0 or token is None:
return False, None, token return False, None, token
emoji = parse_from_token(tokenizer, token) emoji = parse_from_token(tokenizer, token)
print(f'Here! {emoji}')
if not emoji: if not emoji:
return False, None, token return False, None, token
@ -75,7 +75,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
tokenizer.error('Malformed token -- Expected a group token') tokenizer.error('Malformed token -- Expected a group token')
return return
emoji = token.tokens[0] emoji = token.children[0]
if not isinstance(emoji, WordToken): if not isinstance(emoji, WordToken):
tokenizer.error('Malformed token -- Expected an emoji (word token)') tokenizer.error('Malformed token -- Expected an emoji (word token)')
@ -84,24 +84,24 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
modifiers = [] modifiers = []
i = 1 i = 1
while i + 1 < len(token.tokens): while i + 1 < len(token.children):
t = token.tokens[i] t = token.children[i]
if not isinstance(t, DotToken): if not isinstance(t, DotToken):
tokenizer.error('Malformed token -- Expected a dot') tokenizer.error('Malformed token -- Expected a dot')
return return
modifier = token.tokens[i + 1] modifier = token.children[i + 1]
if not isinstance(modifier, WordToken): if not isinstance(modifier, WordToken):
tokenizer.error('Malformed token -- Expected a modifier name (word token)') tokenizer.error('Malformed token -- Expected a modifier name (word token)')
return return
if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken): if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
modifiers.append(Modifier(modifier.value, [])) modifiers.append(Modifier(modifier.value, []))
i += 2 i += 2
else: else:
args = token.tokens[i + 2] args = token.children[i + 2]
modifiers.append(Modifier(modifier.value, args.tokens)) modifiers.append(Modifier(modifier.value, args.children))
i += 3 i += 3
return Emoji(emoji.value, modifiers, token) return Emoji(emoji.value, modifiers, token)

View File

@ -1,4 +1,5 @@
from abc import abstractmethod from abc import abstractmethod
import re
class TokenizerError: class TokenizerError:
index: int index: int
@ -39,6 +40,7 @@ class Tokenizer:
return self.str[token.span[0]:token.span[1]] return self.str[token.span[0]:token.span[1]]
def parse_next_tokens(self): def parse_next_tokens(self):
print(self.str[self.index:])
start = self.index start = self.index
tokens = [] tokens = []
while self.has_next(): while self.has_next():
@ -48,9 +50,18 @@ class Tokenizer:
tokens.append(DotToken.parse(self)) tokens.append(DotToken.parse(self))
elif ArgsToken.can_parse(self): elif ArgsToken.can_parse(self):
tokens.append(ArgsToken.parse(self)) tokens.append(ArgsToken.parse(self))
elif StringLiteralToken.can_parse(self):
tokens.append(StringLiteralToken.parse(self))
else: else:
break break
if len(tokens) == 0:
self.error('Expected a token')
return None
if len(tokens) == 1:
return tokens[0]
return GroupToken((start, self.index), tokens) return GroupToken((start, self.index), tokens)
class Token: class Token:
@ -75,7 +86,7 @@ class WordToken(Token):
@staticmethod @staticmethod
def can_parse(tokenizer: Tokenizer): def can_parse(tokenizer: Tokenizer):
return tokenizer.peek().fullmatch(r'[!#\w@]') return re.fullmatch(r'[!#\w@]', tokenizer.peek())
@staticmethod @staticmethod
def parse(tokenizer: Tokenizer): def parse(tokenizer: Tokenizer):
@ -129,7 +140,7 @@ class NumberLiteralToken(Token):
@staticmethod @staticmethod
def can_parse(tokenizer: Tokenizer): def can_parse(tokenizer: Tokenizer):
return tokenizer.peek().fullmatch(r'[-\d\.]') return re.fullmatch(r'[-\d\.]', tokenizer.peek())
@staticmethod @staticmethod
def parse(tokenizer: Tokenizer): def parse(tokenizer: Tokenizer):
@ -193,6 +204,7 @@ class ArgsToken(Token):
elif tokenizer.peek() == ',': elif tokenizer.peek() == ',':
tokenizer.eat() tokenizer.eat()
else: else:
tokenizer.eat()
tokens.append(tokenizer.parse_next_tokens()) tokens.append(tokenizer.parse_next_tokens())
return ArgsToken((start, tokenizer.index), tokens) return ArgsToken((start, tokenizer.index), tokens)

View File

@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I)
emoji_regex = re.compile(f"<p>\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A) emoji_regex = re.compile(f"<p>\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A)
emoji_regex2 = re.compile(f'(?<!"):([!#@\w\-]{{1,72}}?):(?!([^<]*<\/(code|pre)>|[^`]*`))', flags=re.A) emoji_regex2 = re.compile(f'(?<!"):([!#@\w\-]{{1,72}}?):(?!([^<]*<\/(code|pre)>|[^`]*`))', flags=re.A)
marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A) marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A)
snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A) snappy_url_regex = re.compile('<a href="(https?:\/\/.+?)".*?>(.+?)<\/a>', flags=re.A)

View File

@ -279,7 +279,7 @@ def render_emojis(markup: str):
for emoji_match in marseyfx_emoji_regex.finditer(markup): for emoji_match in marseyfx_emoji_regex.finditer(markup):
emoji_str = emoji_match.group()[1:-1] # Cut off colons emoji_str = emoji_match.group()[1:-1] # Cut off colons
success, emoji = parse_emoji(emoji_str) success, emoji, _ = parse_emoji(emoji_str)
if success: if success:
emojis_used.add(emoji.name) emojis_used.add(emoji.name)
emoji_html = str(emoji.create_el()) emoji_html = str(emoji.create_el())
@ -549,8 +549,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized) sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)
santiized, emojis_used = render_emojis(sanitized)
sanitized = sanitized.replace('&amp;','&') sanitized = sanitized.replace('&amp;','&')
sanitized = video_sub_regex.sub(r'<p class="resizable"><video controls preload="none" src="\1"></video></p>', sanitized) sanitized = video_sub_regex.sub(r'<p class="resizable"><video controls preload="none" src="\1"></video></p>', sanitized)
@ -576,6 +574,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
parse_email=False, url_re=url_re)] parse_email=False, url_re=url_re)]
).clean(sanitized) ).clean(sanitized)
sanitized, emojis_used = render_emojis(sanitized)
#doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
soup = BeautifulSoup(sanitized, 'lxml') soup = BeautifulSoup(sanitized, 'lxml')