(.+?)<\/a>', flags=re.A)
email_regex = re.compile('[A-Za-z0-9._%+-]{1,64}@[A-Za-z0-9.-]{2,63}\.[A-Za-z]{2,63}', flags=re.A)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 3ed11a8a5..d418b846c 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -4,8 +4,10 @@ import re
import signal
from functools import partial
from os import path, listdir
+from typing_extensions import deprecated
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
import time
+from files.helpers.marseyfx.parser import parse_emoji
from sqlalchemy.sql import func
@@ -271,8 +273,21 @@ def find_all_emote_endings(word):
return endings, word
+def render_emojis(markup: str):
+ emojis_used = set()
-def render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False):
+ for emoji_match in marseyfx_emoji_regex.finditer(markup):
+ emoji_str = emoji_match.group()[1:-1] # Cut off colons
+ success, emoji = parse_emoji(emoji_str)
+ if success:
+ emojis_used.add(emoji.name)
+ emoji_html = str(emoji.create_el())
+ markup = markup.replace(emoji_match.group(), emoji_html)
+
+ return markup, emojis_used
+
+@deprecated("Use the new one")
+def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False):
emojis = list(regexp.finditer(html))
captured = set()
@@ -529,29 +544,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = spoiler_regex.sub(r'\1', sanitized)
- emojis_used = set()
-
- emojis = list(emoji_regex.finditer(sanitized))
- if len(emojis) > 20: golden = False
-
- captured = []
- for i in emojis:
- if i.group(0) in captured: continue
- captured.append(i.group(0))
-
- old = i.group(0)
- if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old:
- new = old.lower().replace(">", " class='mb-0'>")
- else: new = old.lower()
-
- new = render_emoji(new, emoji_regex2, golden, emojis_used, True)
-
- sanitized = sanitized.replace(old, new)
-
- emojis = list(emoji_regex2.finditer(sanitized))
- if len(emojis) > 20: golden = False
-
- sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used)
+ santiized, emojis_used = render_emojis(sanitized)
sanitized = sanitized.replace('&','&')
@@ -710,9 +703,7 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
title = remove_cuniform(title)
- emojis_used = set()
-
- title = render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+ title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
if count_emojis:
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
--
2.34.1
From 07448a18f43a18091acc21c34bf86a4f6156eacb Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Tue, 29 Aug 2023 08:55:27 -0400
Subject: [PATCH 3/8] Add pixelated image rendering to Classic emojis
---
files/assets/css/main.css | 6 ++++--
files/helpers/sanitize.py | 5 +++++
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/files/assets/css/main.css b/files/assets/css/main.css
index 97696468e..c795b559b 100644
--- a/files/assets/css/main.css
+++ b/files/assets/css/main.css
@@ -5517,8 +5517,7 @@ input[type=radio] ~ .custom-control-label::before {
height: 150px;
width: 150px;
}
-.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"]
-{
+.emj, .emoji, .bigemoji, .emoji-md, .emoji-lg, img[alt^=":"] {
max-width: 150px !important;
max-height: 150px !important;
max-width: min(150px,25vw) !important;
@@ -5526,6 +5525,9 @@ input[type=radio] ~ .custom-control-label::before {
display: inline-block;
object-fit: contain;
}
+.emoji, .emoji-lg, img[data-kind=Classic] {
+ image-rendering: pixelated;
+}
span[data-bs-toggle], .pat-preview {
position: relative;
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index d418b846c..044ecea55 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -83,6 +83,7 @@ def allowed_attributes(tag, name, value):
if name in {'alt','title'}: return True
if name == 'class' and value == 'img': return True
if name == 'data-user-submitted' and not value: return True
+ if name == 'data-kind' and value in EMOJI_KINDS: return True
if tag == 'lite-youtube':
if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
@@ -338,6 +339,10 @@ def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False)
if(is_loved):
modifier_html = f'{modifier_html}{loved_html}'
+ kind = g.db.query(Emoji.kind).filter(Emoji.name==emoji).one_or_none()
+ if kind:
+ attrs += ' data-kind="' + kind[0].replace('"', '') + '"'
+
if (is_patted and emoji != 'marseyunpettable') or is_talking or is_genocided or is_loved:
if path.isfile(f"files/assets/images/emojis/{emoji}.webp"):
emoji_html = f'{modifier_html}{emoji_partial_pat.format(old, f"{SITE_FULL_IMAGES}/e/{emoji}.webp", attrs)}'
--
2.34.1
From 59d499237d145c7585655981567a0b9b70f0f57d Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Mon, 18 Sep 2023 01:40:02 -0400
Subject: [PATCH 4/8] transforms are back baby
---
files/helpers/config/const.py | 8 ++--
files/helpers/marseyfx/modifiers.py | 62 ++++++++++++++++++-----------
files/helpers/marseyfx/parser.py | 56 +++++++++++++-------------
files/helpers/marseyfx/tokenizer.py | 16 +++++++-
files/helpers/regex.py | 2 +-
files/helpers/sanitize.py | 6 +--
6 files changed, 90 insertions(+), 60 deletions(-)
diff --git a/files/helpers/config/const.py b/files/helpers/config/const.py
index 83f7b7edb..48ec9caf0 100644
--- a/files/helpers/config/const.py
+++ b/files/helpers/config/const.py
@@ -54,7 +54,8 @@ DELETE_EDIT_RATELIMIT = "10/minute;50/day"
PUSH_NOTIF_LIMIT = 1000
-IS_LOCALHOST = SITE == "localhost" or SITE == "127.0.0.1" or SITE.startswith("192.168.") or SITE.endswith(".local")
+IS_LOCALHOST = SITE.startswith("localhost:") or SITE.startswith("127.0.0.1") or SITE.startswith("192.168.") or SITE.endswith(".local")
+print(f"IS_LOCALHOST: {IS_LOCALHOST}")
if IS_LOCALHOST:
SITE_FULL = 'http://' + SITE
@@ -1073,9 +1074,10 @@ engine = create_engine(environ.get("DATABASE_URL").strip(), connect_args={"optio
db_session = scoped_session(sessionmaker(bind=engine, autoflush=False))
approved_embed_hosts_for_csp = ' '.join(set([x.split('/')[0] for x in approved_embed_hosts]))
-csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src {approved_embed_hosts_for_csp} data:; media-src {approved_embed_hosts_for_csp};"
+csp = f"default-src 'none'; frame-ancestors 'none'; form-action 'self'; manifest-src 'self'; worker-src 'self'; base-uri 'self'; font-src 'self'; style-src-elem 'self'; style-src-attr 'unsafe-inline'; style-src 'self' 'unsafe-inline'; script-src-elem 'self' challenges.cloudflare.com; script-src-attr 'none'; script-src 'self' challenges.cloudflare.com; frame-src challenges.cloudflare.com www.youtube-nocookie.com platform.twitter.com rumble.com player.twitch.tv; connect-src 'self' videos.watchpeopledie.tv use1.fptls.com use1.fptls3.com api.fpjs.io; img-src 'self' {approved_embed_hosts_for_csp} data:; media-src 'self' {approved_embed_hosts_for_csp};"
if not IS_LOCALHOST:
csp += ' upgrade-insecure-requests;'
+
with open("includes/content-security-policy", "w") as f:
- f.write(f'add_header Content-Security-Policy "{csp}";')
+ f.write(f'add_header Content-Security-Policy "{csp}";')
\ No newline at end of file
diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py
index 37c44f993..e8d0623de 100644
--- a/files/helpers/marseyfx/modifiers.py
+++ b/files/helpers/marseyfx/modifiers.py
@@ -1,15 +1,24 @@
+import re
from bs4 import BeautifulSoup, Tag
from files.helpers.config.const import SITE_FULL_IMAGES
-from files.helpers.marseyfx.parser import Modifier
-from files.helpers.marseyfx.tokenizer import StringLiteralToken
+from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token
modifier_whitelist = []
+class Modifier:
+ name: str
+ args: list[Token]
+
+ def __init__(self, name: str, args: list[Token]):
+ self.name = name
+ self.args = args
+
def modifier(fn):
modifier_whitelist.append(fn.__name__)
def wrapper(*args, **kwargs):
- args[0].el['class'].append('marseyfx-modifier-' + fn.__name__)
+ slf = args[0]
+ slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'}))
return fn(*args, **kwargs)
return wrapper
@@ -19,7 +28,7 @@ class Modified:
def __init__(self, el):
self.soup = BeautifulSoup()
- self.el = el.wrap(self.soup.new_tag('div', class_='marseyfx-container'))
+ self.el = el
def add_class(self, class_: str):
self.el.attrs['class'].append(' ' + class_)
@@ -32,12 +41,20 @@ class Modified:
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
def image(self, name: str):
- return self.soup.new_tag(
+ image = self.soup.new_tag(
'img',
loading='lazy',
- class_=f'marseyfx-{name}',
- src=f'{SITE_FULL_IMAGES}/i/{name}.webp'
- )
+ src=f'{SITE_FULL_IMAGES}/i/{name}.webp',
+ attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
+ )
+
+ container = self.soup.new_tag(
+ 'div',
+ attrs={'class': f'marseyfx-image-container marseyfx-image-container-{name}'}
+ )
+
+ container.append(image)
+ return container
def underlay(self, underlay: Tag):
self.el.insert(0, underlay)
@@ -47,16 +64,16 @@ class Modified:
@modifier
def pat(self):
- self.overlay(self.el, self.image('pat'))
+ self.overlay(self.image('pat'))
@modifier
def love(self):
- self.overlay(self.el, self.image('love-foreground'))
- self.underlay(self.el, self.image('love-background'))
+ self.overlay(self.image('love-foreground'))
+ self.underlay(self.image('love-background'))
@modifier
def talking(self):
- self.overlay(self.el, self.image('talking'))
+ self.overlay(self.image('talking'))
@modifier
def genocide(self):
@@ -67,35 +84,34 @@ class Modified:
if not isinstance(msg, StringLiteralToken):
return
- self.overlay(self.el, self.image('says'))
+ self.overlay(self.image('says'))
self.el.append(self.soup.new_tag(
'span',
- class_='marseyfx-modifier-says-text',
- string=msg.value
+ string=msg.value,
+ attrs={'class': 'marseyfx-modifier-says-text'}
))
@modifier
def fallover(self):
- self.el.wrap(self.soup.new_tag(
+ self.el = self.el.wrap(self.soup.new_tag(
'div',
- class_='marseyfx-modifier-fallover-container'
+ attrs={'class': 'marseyfx-modifier-fallover-container'}
))
@modifier
- def transform(self, transformstyle: str):
- if not transformstyle.fullmatch(r'[\w()\s%\.]*'):
+ def transform(self, transformstyle: StringLiteralToken):
+ if not re.fullmatch(r'[\w()\s%\.,]*', transformstyle.value):
+ print(f'Evil transform detected: {transformstyle.value}')
return
- if not 'style' in self.el.attrs:
- self.el.attrs['style'] = ''
+ self.el.attrs['style'] = f'transform: {transformstyle.value};'
- self.el.attrs['style'] += f'transform: {transformstyle};'
@modifier
def enraged(self):
self.underlay(self.soup.new_tag(
'div',
- class_='marseyfx-enraged-underlay'
+ attrs={'class': 'marseyfx-enraged-underlay'}
))
@modifier
diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py
index 04f936da2..b567fb44c 100644
--- a/files/helpers/marseyfx/parser.py
+++ b/files/helpers/marseyfx/parser.py
@@ -3,15 +3,7 @@ from tokenize import Token
from bs4 import BeautifulSoup
from files.helpers.config.const import SITE_FULL_IMAGES
from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken
-from modified import Modified
-
-class Modifier:
- name: str
- args: list[Token]
-
- def __init__(self, name: str, args: list[Token]):
- self.name = name
- self.args = args
+from files.helpers.marseyfx.modifiers import Modified, Modifier
emoji_replacers = {
'!': 'is_flipped',
@@ -29,41 +21,49 @@ class Emoji:
def __init__(self, name: str, modifiers, token: Token):
for symbol, value in emoji_replacers.items():
- name = name.replace(symbol, '')
- setattr(self, value, True)
+ if symbol in name:
+ name = name.replace(symbol, '')
+ setattr(self, value, True)
self.name = name
self.modifiers = modifiers
self.token = token
+
def create_el(self):
soup = BeautifulSoup()
el = soup.new_tag(
'img',
loading='lazy',
- class_='marseyfx-emoji',
- src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp'
+ src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
+ attrs={'class': f'marseyfx-emoji marseyfx-image'}
+ )
+ soup.append(el)
+ el = el.wrap(
+ soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'})
)
-
- if (self.is_big):
- el['class'].append(' marseyfx-big')
-
- if (self.is_flipped):
- el['class'].append(' marseyfx-flipped')
mod = Modified(el)
mod.apply_modifiers(self.modifiers)
- return mod.el
+ container = soup.new_tag('div', attrs={'class': 'marseyfx-container'})
+ if (self.is_big):
+ container['class'].append(' marseyfx-big')
+
+ if (self.is_flipped):
+ container['class'].append(' marseyfx-flipped')
+
+ return mod.el.wrap(container)
def parse_emoji(str: str):
tokenizer = Tokenizer(str)
token = tokenizer.parse_next_tokens()
- if len(tokenizer.errors) > 0:
+ if len(tokenizer.errors) > 0 or token is None:
return False, None, token
emoji = parse_from_token(tokenizer, token)
+ print(f'Here! {emoji}')
if not emoji:
return False, None, token
@@ -75,7 +75,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
tokenizer.error('Malformed token -- Expected a group token')
return
- emoji = token.tokens[0]
+ emoji = token.children[0]
if not isinstance(emoji, WordToken):
tokenizer.error('Malformed token -- Expected an emoji (word token)')
@@ -84,24 +84,24 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
modifiers = []
i = 1
- while i + 1 < len(token.tokens):
- t = token.tokens[i]
+ while i + 1 < len(token.children):
+ t = token.children[i]
if not isinstance(t, DotToken):
tokenizer.error('Malformed token -- Expected a dot')
return
- modifier = token.tokens[i + 1]
+ modifier = token.children[i + 1]
if not isinstance(modifier, WordToken):
tokenizer.error('Malformed token -- Expected a modifier name (word token)')
return
- if not i + 2 < len(token.tokens) or not isinstance(token.tokens[i + 2], ArgsToken):
+ if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
modifiers.append(Modifier(modifier.value, []))
i += 2
else:
- args = token.tokens[i + 2]
- modifiers.append(Modifier(modifier.value, args.tokens))
+ args = token.children[i + 2]
+ modifiers.append(Modifier(modifier.value, args.children))
i += 3
return Emoji(emoji.value, modifiers, token)
\ No newline at end of file
diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py
index 6a01129f9..82e859b25 100644
--- a/files/helpers/marseyfx/tokenizer.py
+++ b/files/helpers/marseyfx/tokenizer.py
@@ -1,4 +1,5 @@
from abc import abstractmethod
+import re
class TokenizerError:
index: int
@@ -39,6 +40,7 @@ class Tokenizer:
return self.str[token.span[0]:token.span[1]]
def parse_next_tokens(self):
+ print(self.str[self.index:])
start = self.index
tokens = []
while self.has_next():
@@ -48,9 +50,18 @@ class Tokenizer:
tokens.append(DotToken.parse(self))
elif ArgsToken.can_parse(self):
tokens.append(ArgsToken.parse(self))
+ elif StringLiteralToken.can_parse(self):
+ tokens.append(StringLiteralToken.parse(self))
else:
break
+ if len(tokens) == 0:
+ self.error('Expected a token')
+ return None
+
+ if len(tokens) == 1:
+ return tokens[0]
+
return GroupToken((start, self.index), tokens)
class Token:
@@ -75,7 +86,7 @@ class WordToken(Token):
@staticmethod
def can_parse(tokenizer: Tokenizer):
- return tokenizer.peek().fullmatch(r'[!#\w@]')
+ return re.fullmatch(r'[!#\w@]', tokenizer.peek())
@staticmethod
def parse(tokenizer: Tokenizer):
@@ -129,7 +140,7 @@ class NumberLiteralToken(Token):
@staticmethod
def can_parse(tokenizer: Tokenizer):
- return tokenizer.peek().fullmatch(r'[-\d\.]')
+ return re.fullmatch(r'[-\d\.]', tokenizer.peek())
@staticmethod
def parse(tokenizer: Tokenizer):
@@ -193,6 +204,7 @@ class ArgsToken(Token):
elif tokenizer.peek() == ',':
tokenizer.eat()
else:
+ tokenizer.eat()
tokens.append(tokenizer.parse_next_tokens())
return ArgsToken((start, tokenizer.index), tokens)
\ No newline at end of file
diff --git a/files/helpers/regex.py b/files/helpers/regex.py
index d0b0f16ea..f754ce7b9 100644
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I)
emoji_regex = re.compile(f"\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A)
emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A)
-marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\]:', flags=re.A)
+marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A)
snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 044ecea55..eb2b749b3 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -279,7 +279,7 @@ def render_emojis(markup: str):
for emoji_match in marseyfx_emoji_regex.finditer(markup):
emoji_str = emoji_match.group()[1:-1] # Cut off colons
- success, emoji = parse_emoji(emoji_str)
+ success, emoji, _ = parse_emoji(emoji_str)
if success:
emojis_used.add(emoji.name)
emoji_html = str(emoji.create_el())
@@ -549,8 +549,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = spoiler_regex.sub(r'\1', sanitized)
- santiized, emojis_used = render_emojis(sanitized)
-
sanitized = sanitized.replace('&','&')
sanitized = video_sub_regex.sub(r'', sanitized)
@@ -576,6 +574,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
parse_email=False, url_re=url_re)]
).clean(sanitized)
+ sanitized, emojis_used = render_emojis(sanitized)
+
#doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
soup = BeautifulSoup(sanitized, 'lxml')
--
2.34.1
From 33163d2e5e1ab7a4eb7e7efb4cc0da039c02c304 Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Mon, 18 Sep 2023 11:05:28 -0400
Subject: [PATCH 5/8] some updates xd
---
files/helpers/marseyfx/modifiers.py | 100 ++++++++++++++++++++++++----
files/helpers/marseyfx/parser.py | 4 +-
files/helpers/marseyfx/tokenizer.py | 12 ++--
3 files changed, 93 insertions(+), 23 deletions(-)
diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py
index e8d0623de..bb3412f51 100644
--- a/files/helpers/marseyfx/modifiers.py
+++ b/files/helpers/marseyfx/modifiers.py
@@ -1,7 +1,8 @@
import re
from bs4 import BeautifulSoup, Tag
from files.helpers.config.const import SITE_FULL_IMAGES
-from files.helpers.marseyfx.tokenizer import StringLiteralToken, Token
+from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer
+import files.helpers.marseyfx.parser as parser
modifier_whitelist = []
@@ -18,20 +19,41 @@ def modifier(fn):
def wrapper(*args, **kwargs):
slf = args[0]
- slf.el = slf.el.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'}))
+ slf.child = slf.container
+ slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'}))
+ return fn(*args, **kwargs)
+ return wrapper
+
+def heavy(fn):
+ def wrapper(*args, **kwargs):
+ slf = args[0]
+ slf.heavy_count += 1
return fn(*args, **kwargs)
return wrapper
class Modified:
soup: BeautifulSoup
- el: Tag #BeautifulSoup element
+ container: Tag
+ child: Tag
+ tokenizer: Tokenizer
+ heavy_count = 0
- def __init__(self, el):
+ def __init__(self, el, tokenizer):
self.soup = BeautifulSoup()
- self.el = el
+ self.container = el
+ self.tokenizer = tokenizer
def add_class(self, class_: str):
- self.el.attrs['class'].append(' ' + class_)
+ if not 'class' in self.container.attrs:
+ self.container.attrs['class'] = ''
+ else:
+ self.container.attrs['class'].append(' ' + class_)
+
+ def add_child_class(self, class_: str):
+ if not 'class' in self.child.attrs:
+ self.child.attrs['class'] = ''
+ else:
+ self.child.attrs['class'].append(' ' + class_)
def apply_modifiers(self, modifiers: list[Modifier]):
for modifier in modifiers:
@@ -40,7 +62,7 @@ class Modified:
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
- def image(self, name: str):
+ def image_href(self, name: str):
image = self.soup.new_tag(
'img',
loading='lazy',
@@ -57,10 +79,16 @@ class Modified:
return container
def underlay(self, underlay: Tag):
- self.el.insert(0, underlay)
+ self.container.insert(0, underlay)
def overlay(self, overlay: Tag):
- self.el.append(overlay)
+ self.container.append(overlay)
+
+ def add_style(self, style: str):
+ if 'style' in self.container.attrs:
+ style = self.container.attrs['style'] + style
+
+ self.container.attrs['style'] = style
@modifier
def pat(self):
@@ -85,7 +113,7 @@ class Modified:
return
self.overlay(self.image('says'))
- self.el.append(self.soup.new_tag(
+ self.container.append(self.soup.new_tag(
'span',
string=msg.value,
attrs={'class': 'marseyfx-modifier-says-text'}
@@ -93,7 +121,7 @@ class Modified:
@modifier
def fallover(self):
- self.el = self.el.wrap(self.soup.new_tag(
+ self.container = self.container.wrap(self.soup.new_tag(
'div',
attrs={'class': 'marseyfx-modifier-fallover-container'}
))
@@ -104,9 +132,9 @@ class Modified:
print(f'Evil transform detected: {transformstyle.value}')
return
- self.el.attrs['style'] = f'transform: {transformstyle.value};'
-
+ self.add_style(f'transform: {transformstyle.value};')
+ @heavy
@modifier
def enraged(self):
self.underlay(self.soup.new_tag(
@@ -114,10 +142,54 @@ class Modified:
attrs={'class': 'marseyfx-enraged-underlay'}
))
+ @heavy
@modifier
def corrupted(self):
pass
+ @heavy
@modifier
def wavy(self):
- self.el.wrap(self.soup.new_tag('svg'))
\ No newline at end of file
+ self.container.wrap(self.soup.new_tag('svg'))
+
+ @modifier
+ def toptext(self, text: StringLiteralToken):
+ if not isinstance(text, StringLiteralToken):
+ return
+
+ self.overlay(self.soup.new_tag(
+ 'span',
+ string=text.value,
+ attrs={'class': 'marseyfx-modifier-toptext-text'}
+ ))
+
+ @modifier
+ def bottomtext(self, text: StringLiteralToken):
+ if not isinstance(text, StringLiteralToken):
+ return
+
+ self.overlay(self.soup.new_tag(
+ 'span',
+ string=text.value,
+ attrs={'class': 'marseyfx-modifier-bottomtext-text'}
+ ))
+
+ @modifier
+ def spin(self, speed: NumberLiteralToken):
+ self.add_style('--marseyfx-spin-speed: ' + speed.value + ';')
+
+ @modifier
+ def triumphs(self, other: GroupToken):
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
+
+ if other_emoji is None:
+ return
+
+ self.add_child_class('marseyfx-modifier-triumphs-self')
+
+ other = other_emoji.create_el().wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
+ )
+ self.underlay(other)
+
+
\ No newline at end of file
diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py
index b567fb44c..985d612e5 100644
--- a/files/helpers/marseyfx/parser.py
+++ b/files/helpers/marseyfx/parser.py
@@ -53,7 +53,7 @@ class Emoji:
if (self.is_flipped):
container['class'].append(' marseyfx-flipped')
- return mod.el.wrap(container)
+ return mod.container.wrap(container)
def parse_emoji(str: str):
tokenizer = Tokenizer(str)
@@ -101,7 +101,7 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
i += 2
else:
args = token.children[i + 2]
- modifiers.append(Modifier(modifier.value, args.children))
+ modifiers.append(Modifier(modifier.value, *args.children))
i += 3
return Emoji(emoji.value, modifiers, token)
\ No newline at end of file
diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py
index 82e859b25..bd3db22c3 100644
--- a/files/helpers/marseyfx/tokenizer.py
+++ b/files/helpers/marseyfx/tokenizer.py
@@ -40,11 +40,12 @@ class Tokenizer:
return self.str[token.span[0]:token.span[1]]
def parse_next_tokens(self):
- print(self.str[self.index:])
start = self.index
tokens = []
while self.has_next():
- if WordToken.can_parse(self):
+ if NumberLiteralToken.can_parse(self):
+ tokens.append(NumberLiteralToken.parse(self))
+ elif WordToken.can_parse(self):
tokens.append(WordToken.parse(self))
elif DotToken.can_parse(self):
tokens.append(DotToken.parse(self))
@@ -58,9 +59,6 @@ class Tokenizer:
if len(tokens) == 0:
self.error('Expected a token')
return None
-
- if len(tokens) == 1:
- return tokens[0]
return GroupToken((start, self.index), tokens)
@@ -86,7 +84,7 @@ class WordToken(Token):
@staticmethod
def can_parse(tokenizer: Tokenizer):
- return re.fullmatch(r'[!#\w@]', tokenizer.peek())
+ return re.fullmatch(r'[!#@a-zA-Z]', tokenizer.peek())
@staticmethod
def parse(tokenizer: Tokenizer):
@@ -205,6 +203,6 @@ class ArgsToken(Token):
tokenizer.eat()
else:
tokenizer.eat()
- tokens.append(tokenizer.parse_next_tokens())
+ tokens.extend(tokenizer.parse_next_tokens())
return ArgsToken((start, tokenizer.index), tokens)
\ No newline at end of file
--
2.34.1
From 897ee3e1cd224307fbaa7d8a98a4042b57672654 Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Mon, 18 Sep 2023 22:27:27 -0400
Subject: [PATCH 6/8] small changes uwu
---
files/helpers/marseyfx/modifiers.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py
index bb3412f51..68b3e8d9b 100644
--- a/files/helpers/marseyfx/modifiers.py
+++ b/files/helpers/marseyfx/modifiers.py
@@ -62,7 +62,7 @@ class Modified:
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
- def image_href(self, name: str):
+ def image(self, name: str):
image = self.soup.new_tag(
'img',
loading='lazy',
@@ -139,12 +139,12 @@ class Modified:
def enraged(self):
self.underlay(self.soup.new_tag(
'div',
- attrs={'class': 'marseyfx-enraged-underlay'}
+ attrs={'class': 'marseyfx-modifier-enraged-underlay'}
))
@heavy
@modifier
- def corrupted(self):
+ def highcontrast(self):
pass
@heavy
@@ -176,7 +176,7 @@ class Modified:
@modifier
def spin(self, speed: NumberLiteralToken):
- self.add_style('--marseyfx-spin-speed: ' + speed.value + ';')
+ self.add_style('--marseyfx-spin-peroid-multiplier: ' + (1/speed.value) + ';')
@modifier
def triumphs(self, other: GroupToken):
--
2.34.1
From 22c9dd19080266c0c505ac5ebdd7f77751f8eb38 Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Sun, 24 Sep 2023 02:02:53 -0400
Subject: [PATCH 7/8] Update MarseyFX
---
.gitignore | 4 +
files/assets/images/prohibition.svg | 57 +++++
files/assets/images/scope.svg | 347 ++++++++++++++++++++++++++++
files/helpers/marseyfx/modifiers.py | 192 ++++++++++++---
files/helpers/marseyfx/parser.py | 49 ++--
files/helpers/marseyfx/tokenizer.py | 40 +++-
files/helpers/regex.py | 2 +-
files/helpers/sanitize.py | 74 ++++--
8 files changed, 687 insertions(+), 78 deletions(-)
create mode 100644 files/assets/images/prohibition.svg
create mode 100644 files/assets/images/scope.svg
diff --git a/.gitignore b/.gitignore
index 97a56b27c..4cdeeafe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,7 @@ __pycache__/
emojis.zip
emojis_original.zip
includes/content-security-policy
+includes/headers
+nginx.conf
+.gitignore
+docker-compose.yml
\ No newline at end of file
diff --git a/files/assets/images/prohibition.svg b/files/assets/images/prohibition.svg
new file mode 100644
index 000000000..a9e1c44af
--- /dev/null
+++ b/files/assets/images/prohibition.svg
@@ -0,0 +1,57 @@
+
+
diff --git a/files/assets/images/scope.svg b/files/assets/images/scope.svg
new file mode 100644
index 000000000..35636b9f0
--- /dev/null
+++ b/files/assets/images/scope.svg
@@ -0,0 +1,347 @@
+
+
+
+
diff --git a/files/helpers/marseyfx/modifiers.py b/files/helpers/marseyfx/modifiers.py
index 68b3e8d9b..679b86ef0 100644
--- a/files/helpers/marseyfx/modifiers.py
+++ b/files/helpers/marseyfx/modifiers.py
@@ -1,4 +1,6 @@
+import copy
import re
+from typing import Optional
from bs4 import BeautifulSoup, Tag
from files.helpers.config.const import SITE_FULL_IMAGES
from files.helpers.marseyfx.tokenizer import GroupToken, NumberLiteralToken, StringLiteralToken, Token, Tokenizer
@@ -19,9 +21,14 @@ def modifier(fn):
def wrapper(*args, **kwargs):
slf = args[0]
+ ctx = ModifierContextFrame(fn.__name__)
+ slf.context_frames.insert(0, ctx)
slf.child = slf.container
- slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{fn.__name__}'}))
- return fn(*args, **kwargs)
+ slf.container = slf.child.wrap(slf.soup.new_tag('div', attrs={'class': f'marseyfx-modifier marseyfx-modifier-{ctx.name}'}))
+ slf.add_child_class(f'marseyfx-modifier-{ctx.name}-self')
+ res = fn(*args, **kwargs)
+ slf.context_frames.pop(0)
+ return res
return wrapper
def heavy(fn):
@@ -31,17 +38,27 @@ def heavy(fn):
return fn(*args, **kwargs)
return wrapper
+class ModifierContextFrame:
+ name: str
+ def __init__(self, name: str):
+ self.name = name
+
class Modified:
soup: BeautifulSoup
container: Tag
child: Tag
tokenizer: Tokenizer
heavy_count = 0
+ context_frames: list[ModifierContextFrame]
def __init__(self, el, tokenizer):
self.soup = BeautifulSoup()
self.container = el
self.tokenizer = tokenizer
+ self.context_frames = []
+
+ def ctx(self):
+ return self.context_frames[0] if len(self.context_frames) > 0 else None
def add_class(self, class_: str):
if not 'class' in self.container.attrs:
@@ -58,15 +75,21 @@ class Modified:
def apply_modifiers(self, modifiers: list[Modifier]):
for modifier in modifiers:
if modifier.name in modifier_whitelist:
- getattr(self, modifier.name)(*modifier.args)
+ getattr(self, modifier.name)(*map(GroupToken.unwrap, modifier.args))
# Using this instead of throwing everything in a string and then parsing it helps
# mitigate the risk of XSS attacks
def image(self, name: str):
+
+ filename = name
+
+ if not '.' in filename:
+ filename += '.webp'
+
image = self.soup.new_tag(
'img',
loading='lazy',
- src=f'{SITE_FULL_IMAGES}/i/{name}.webp',
+ src=f'{SITE_FULL_IMAGES}/i/{filename}',
attrs={'class': f'marseyfx-image marseyfx-image-{name}'}
)
@@ -90,9 +113,39 @@ class Modified:
self.container.attrs['style'] = style
+ def meme_text(self, text: str, class_: Optional[str] = None):
+ attrs = {}
+ if class_ is not None:
+ attrs = {'class': f'marseyfx-memetext-{class_}'}
+
+ tag = self.soup.new_tag(
+ 'span',
+ attrs=attrs
+ )
+
+ tag.string = text
+
+ self.overlay(tag)
+
+ def create_other(self, other: GroupToken = None):
+ wrapper = self.soup.new_tag('div', attrs={'class': f'marseyfx-modifier-{self.ctx().name}-other'})
+
+ if other is None:
+ return wrapper
+
+ other = other.wrap()
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
+
+ if other_emoji is None:
+ return wrapper
+
+ other_emoji.is_primary = False
+
+ return other_emoji.create_el(self.tokenizer).wrap(wrapper)
+
@modifier
def pat(self):
- self.overlay(self.image('pat'))
+ self.overlay(self.image('hand'))
@modifier
def love(self):
@@ -107,18 +160,33 @@ class Modified:
def genocide(self):
pass
+ @modifier
+ def party(self):
+ pass
+
@modifier
def says(self, msg):
if not isinstance(msg, StringLiteralToken):
return
- self.overlay(self.image('says'))
- self.container.append(self.soup.new_tag(
- 'span',
- string=msg.value,
- attrs={'class': 'marseyfx-modifier-says-text'}
+ container = self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-says-container'}
+ )
+ self.container.append(container)
+
+ container.append(self.soup.new_tag(
+ 'div',
+ attrs={'class': 'marseyfx-modifier-says-nub'}
))
+ tag = self.soup.new_tag(
+ 'span',
+ attrs={'class': 'marseyfx-modifier-says-text'}
+ )
+ tag.string = msg.value
+ container.append(tag)
+
@modifier
def fallover(self):
self.container = self.container.wrap(self.soup.new_tag(
@@ -142,54 +210,106 @@ class Modified:
attrs={'class': 'marseyfx-modifier-enraged-underlay'}
))
- @heavy
@modifier
- def highcontrast(self):
- pass
+ def meme(self, toptext: Optional[StringLiteralToken] = None, bottomtext: Optional[StringLiteralToken] = None):
+ if isinstance(toptext, StringLiteralToken):
+ self.meme_text(toptext.value, 'toptext')
- @heavy
- @modifier
- def wavy(self):
- self.container.wrap(self.soup.new_tag('svg'))
+ if isinstance(bottomtext, StringLiteralToken):
+ self.meme_text(bottomtext.value, 'bottomtext')
- @modifier
- def toptext(self, text: StringLiteralToken):
- if not isinstance(text, StringLiteralToken):
- return
-
- self.overlay(self.soup.new_tag(
- 'span',
- string=text.value,
- attrs={'class': 'marseyfx-modifier-toptext-text'}
- ))
-
- @modifier
def bottomtext(self, text: StringLiteralToken):
if not isinstance(text, StringLiteralToken):
return
- self.overlay(self.soup.new_tag(
+ tag = self.soup.new_tag(
'span',
- string=text.value,
attrs={'class': 'marseyfx-modifier-bottomtext-text'}
- ))
+ )
+
+ tag.string = text.value
+
+ self.overlay(tag)
@modifier
- def spin(self, speed: NumberLiteralToken):
- self.add_style('--marseyfx-spin-peroid-multiplier: ' + (1/speed.value) + ';')
+ def spin(self, speed=None):
+ if not isinstance(speed, NumberLiteralToken):
+ return
+
+ self.add_style(f'animation-duration: {1/speed.value}s;')
@modifier
def triumphs(self, other: GroupToken):
+ other = other.wrap()
other_emoji = parser.parse_from_token(self.tokenizer, other)
+ print(f'Other emoji: {other_emoji} / Token: {other}')
if other_emoji is None:
return
self.add_child_class('marseyfx-modifier-triumphs-self')
- other = other_emoji.create_el().wrap(
+ other_emoji.is_primary = False
+
+ other = other_emoji.create_el(self.tokenizer).wrap(
self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-triumphs-other'})
)
self.underlay(other)
-
\ No newline at end of file
+ @modifier
+ def nested(self, inside: GroupToken):
+ inside = inside.wrap()
+ inside_emoji = parser.parse_from_token(self.tokenizer, inside)
+
+ if inside_emoji is None:
+ return
+
+ inside_emoji.is_primary = False
+
+ inside = inside_emoji.create_el(self.tokenizer).wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-other'})
+ )
+
+ self.underlay(inside)
+
+ self.add_child_class('marseyfx-modifier-nested-side')
+ child = self.child
+ self.child = child.wrap(self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-nested-outer-container'}))
+ other_side = copy.copy(child)
+ self.child.append(other_side)
+
+ @modifier
+ def morph(self, other: GroupToken):
+ self.add_child_class('marseyfx-modifier-morph-self')
+
+ other = other.wrap()
+ other_emoji = parser.parse_from_token(self.tokenizer, other)
+
+ if other_emoji is None:
+ return
+
+ other_emoji.is_primary = False
+ other = other_emoji.create_el(self.tokenizer).wrap(
+ self.soup.new_tag('div', attrs={'class': 'marseyfx-modifier-morph-other'})
+ )
+
+ self.container.append(other)
+
+ @heavy
+ @modifier
+ def bulge(self, strength: NumberLiteralToken = None):
+ self.child = self.child.wrap(self.soup.new_tag('svg', attrs={'class': 'marseyfx-modifier-bulge-container'}))
+
+ @modifier
+ def prohibition(self):
+ self.overlay(self.image('prohibition.svg'))
+
+ @modifier
+ def snipe(self):
+ self.overlay(self.image('scope.svg'))
+ self.add_child_class('marseyfx-modifier-snipe-target')
+
+ @modifier
+ def fucks(self, other: GroupToken):
+ other = self.create_other(other)
+ self.container.append(other)
\ No newline at end of file
diff --git a/files/helpers/marseyfx/parser.py b/files/helpers/marseyfx/parser.py
index 985d612e5..bfd7c0530 100644
--- a/files/helpers/marseyfx/parser.py
+++ b/files/helpers/marseyfx/parser.py
@@ -2,8 +2,8 @@ from tokenize import Token
from bs4 import BeautifulSoup
from files.helpers.config.const import SITE_FULL_IMAGES
-from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, Tokenizer, WordToken
-from files.helpers.marseyfx.modifiers import Modified, Modifier
+from files.helpers.marseyfx.tokenizer import ArgsToken, DotToken, GroupToken, NumberLiteralToken, Tokenizer, WordToken
+from files.helpers.marseyfx.modifiers import Modified, Modifier, modifier_whitelist
emoji_replacers = {
'!': 'is_flipped',
@@ -18,8 +18,9 @@ class Emoji:
is_flipped = False
is_user = False
modifiers: list[Modifier]
+ is_primary = True
- def __init__(self, name: str, modifiers, token: Token):
+ def __init__(self, name: str, modifiers, token: Token, **args):
for symbol, value in emoji_replacers.items():
if symbol in name:
name = name.replace(symbol, '')
@@ -28,25 +29,42 @@ class Emoji:
self.name = name
self.modifiers = modifiers
self.token = token
+ self.is_primary = args.get('is_primary', True)
- def create_el(self):
+ def create_el(self, tokenizer: Tokenizer):
soup = BeautifulSoup()
el = soup.new_tag(
'img',
loading='lazy',
src=f'{SITE_FULL_IMAGES}/e/{self.name}.webp',
- attrs={'class': f'marseyfx-emoji marseyfx-image'}
+ attrs={
+ 'class': f'marseyfx-emoji marseyfx-image',
+ }
)
soup.append(el)
el = el.wrap(
- soup.new_tag('div', attrs={'class': 'marseyfx-emoji-container'})
+ soup.new_tag('div', attrs={
+ 'class': 'marseyfx-emoji-container'
+ })
)
- mod = Modified(el)
+ mod = Modified(el, tokenizer)
mod.apply_modifiers(self.modifiers)
- container = soup.new_tag('div', attrs={'class': 'marseyfx-container'})
+
+ container_attrs = {
+ 'class': 'marseyfx-container',
+ }
+
+ if self.is_primary:
+ container_attrs |= {
+ 'data-bs-toggle': 'tooltip',
+ 'title': tokenizer.str
+ }
+
+ container = soup.new_tag('div', attrs=container_attrs)
+
if (self.is_big):
container['class'].append(' marseyfx-big')
@@ -55,8 +73,7 @@ class Emoji:
return mod.container.wrap(container)
-def parse_emoji(str: str):
- tokenizer = Tokenizer(str)
+def parse_emoji(tokenizer: Tokenizer):
token = tokenizer.parse_next_tokens()
if len(tokenizer.errors) > 0 or token is None:
@@ -77,8 +94,8 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
emoji = token.children[0]
- if not isinstance(emoji, WordToken):
- tokenizer.error('Malformed token -- Expected an emoji (word token)')
+ if not isinstance(emoji, WordToken) and not isinstance(emoji, NumberLiteralToken):
+ tokenizer.error('Malformed token -- Expected an emoji (word token) or number literal token')
return
modifiers = []
@@ -96,12 +113,16 @@ def parse_from_token(tokenizer: Tokenizer, token: GroupToken):
tokenizer.error('Malformed token -- Expected a modifier name (word token)')
return
+ if not modifier.value in modifier_whitelist:
+ tokenizer.error(f'Unknown modifier: {modifier.value}')
+ return
+
if not i + 2 < len(token.children) or not isinstance(token.children[i + 2], ArgsToken):
modifiers.append(Modifier(modifier.value, []))
i += 2
else:
args = token.children[i + 2]
- modifiers.append(Modifier(modifier.value, *args.children))
+ modifiers.append(Modifier(modifier.value, args.children))
i += 3
- return Emoji(emoji.value, modifiers, token)
\ No newline at end of file
+ return Emoji(tokenizer.str[emoji.span[0]:emoji.span[1]], modifiers, token)
\ No newline at end of file
diff --git a/files/helpers/marseyfx/tokenizer.py b/files/helpers/marseyfx/tokenizer.py
index bd3db22c3..7d44b84bc 100644
--- a/files/helpers/marseyfx/tokenizer.py
+++ b/files/helpers/marseyfx/tokenizer.py
@@ -5,10 +5,14 @@ class TokenizerError:
index: int
error: str
- def __init__(self, index: int, error: str):
+ def __init__(self, tokenizer, index: int, error: str):
+ self.tokenizer = tokenizer
self.index = index
self.error = error
+ def __str__(self):
+ return f'{self.error}\n {self.tokenizer.str}\n {" " * self.index}^'
+
class Tokenizer:
str: str
index: int
@@ -23,6 +27,9 @@ class Tokenizer:
return self.index < len(self.str)
def peek(self):
+ if not self.has_next():
+ self.error('Unexpected end of input')
+ return None
return self.str[self.index]
def eat(self):
@@ -34,7 +41,7 @@ class Tokenizer:
self.index -= 1
def error(self, error: str):
- self.errors.append(TokenizerError(self.index, error))
+ self.errors.append(TokenizerError(self, self.index, error))
def token_to_string(self, token):
return self.str[token.span[0]:token.span[1]]
@@ -43,7 +50,9 @@ class Tokenizer:
start = self.index
tokens = []
while self.has_next():
- if NumberLiteralToken.can_parse(self):
+ if self.peek() == ' ':
+ self.eat()
+ elif NumberLiteralToken.can_parse(self):
tokens.append(NumberLiteralToken.parse(self))
elif WordToken.can_parse(self):
tokens.append(WordToken.parse(self))
@@ -65,6 +74,12 @@ class Tokenizer:
class Token:
span: tuple[int, int]
+ def wrap(self):
+ if isinstance(self, GroupToken):
+ return self
+ else:
+ return GroupToken(self.span, [self])
+
@staticmethod
@abstractmethod
def can_parse(tokenizer: Tokenizer) -> bool:
@@ -138,6 +153,10 @@ class NumberLiteralToken(Token):
@staticmethod
def can_parse(tokenizer: Tokenizer):
+ return re.fullmatch(r'[-\d]', tokenizer.peek())
+
+ @staticmethod
+ def can_parse_next(tokenizer: Tokenizer):
return re.fullmatch(r'[-\d\.]', tokenizer.peek())
@staticmethod
@@ -145,7 +164,7 @@ class NumberLiteralToken(Token):
start = tokenizer.index
value = ''
while tokenizer.has_next():
- if NumberLiteralToken.can_parse(tokenizer):
+ if NumberLiteralToken.can_parse_next(tokenizer):
value += tokenizer.eat()
else:
break
@@ -179,8 +198,16 @@ class GroupToken(Token):
def __init__(self, span: tuple[int, int], children: list[Token]):
self.children = children
+
+ # this span is probably wrong tbh but idc
self.span = span
+ def unwrap(self):
+ if len(self.children) == 1:
+ return self.children[0]
+ else:
+ return self
+
class ArgsToken(Token):
children: list[GroupToken]
def __init__(self, span: tuple[int, int], children: list[Token]):
@@ -196,13 +223,14 @@ class ArgsToken(Token):
start = tokenizer.index
tokens = []
while tokenizer.has_next():
+ if tokenizer.peek() == '(':
+ tokenizer.eat()
if tokenizer.peek() == ')':
tokenizer.eat()
break
elif tokenizer.peek() == ',':
tokenizer.eat()
else:
- tokenizer.eat()
- tokens.extend(tokenizer.parse_next_tokens())
+ tokens.append(tokenizer.parse_next_tokens())
return ArgsToken((start, tokenizer.index), tokens)
\ No newline at end of file
diff --git a/files/helpers/regex.py b/files/helpers/regex.py
index f754ce7b9..e71e4e9bf 100644
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@@ -52,7 +52,7 @@ mute_regex = re.compile("\/mute @?([\w-]{1,30}) ([0-9]+)", flags=re.A|re.I)
emoji_regex = re.compile(f"\s*(:[!#@\w\-]{{1,72}}:\s*)+<\/p>", flags=re.A)
emoji_regex2 = re.compile(f'(?|[^`]*`))', flags=re.A)
-marseyfx_emoji_regex = re.compile(':[^\s].{0,98}?[^\\\\]:', flags=re.A)
+marseyfx_emoji_regex = re.compile(':[\w#!].{0,98}?[^\\\\]:', flags=re.A)
snappy_url_regex = re.compile('(.+?)<\/a>', flags=re.A)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index eb2b749b3..9c5d26b41 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -8,6 +8,7 @@ from typing_extensions import deprecated
from urllib.parse import parse_qs, urlparse, unquote, ParseResult, urlencode, urlunparse
import time
from files.helpers.marseyfx.parser import parse_emoji
+from files.helpers.marseyfx.tokenizer import Tokenizer
from sqlalchemy.sql import func
@@ -129,7 +130,7 @@ def build_url_re(tlds, protocols):
"""
return re.compile(
r"""\(*# Match any opening parentheses.
- \b(?"]*)?
# /path/zz (excluding "unsafe" chars from RFC 1738,
@@ -274,18 +275,56 @@ def find_all_emote_endings(word):
return endings, word
-def render_emojis(markup: str):
- emojis_used = set()
+class RenderEmojisResult:
+ emojis_used: set[str]
+ heavy_count = 0
+ tags: list[str]
+
+ def __init__(self):
+ self.emojis_used = set()
+ self.tags = []
+
+ def update(self, other):
+ self.emojis_used |= other.emojis_used
+ self.heavy_count += other.heavy_count
+ self.tags.extend(other.tags)
+
+def render_emojis(markup: str, **kwargs):
+ result = RenderEmojisResult()
+ last_match_end = 0
+
+ golden = kwargs.get('golden', True)
+ permit_big = kwargs.get('permit_big', True)
for emoji_match in marseyfx_emoji_regex.finditer(markup):
- emoji_str = emoji_match.group()[1:-1] # Cut off colons
- success, emoji, _ = parse_emoji(emoji_str)
- if success:
- emojis_used.add(emoji.name)
- emoji_html = str(emoji.create_el())
- markup = markup.replace(emoji_match.group(), emoji_html)
+ previous_text = markup[last_match_end:emoji_match.start()]
+ if previous_text != '':
+ result.tags.append(previous_text)
+ last_match_end = emoji_match.end()
- return markup, emojis_used
+ emoji_str = emoji_match.group()[1:-1] # Cut off colons
+
+ tokenizer = Tokenizer(emoji_str)
+ success, emoji, _ = parse_emoji(tokenizer)
+ if success:
+ result.emojis_used.add(emoji.name)
+
+ if not permit_big:
+ emoji.is_big = False
+
+ emoji_html = emoji.create_el(tokenizer)
+ result.tags.append(emoji_html)
+
+ if len(tokenizer.errors) > 0:
+ soup = BeautifulSoup()
+ err_tag = soup.new_tag('pre', attrs={'class': 'marseyfx-error'})
+ nl = "\n "
+ err_tag.string = 'MarseyFX error:' + nl + nl.join(map(str,tokenizer.errors))
+ result.tags.append(err_tag)
+ #result.tags.append(f':{emoji_str}:')
+
+ result.tags.append(markup[last_match_end:])
+ return result
@deprecated("Use the new one")
def old_render_emoji(html, regexp, golden, emojis_used, b=False, is_title=False):
@@ -554,11 +593,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = video_sub_regex.sub(r'', sanitized)
sanitized = audio_sub_regex.sub(r'', sanitized)
- if count_emojis:
- for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
- emoji.count += 1
- g.db.add(emoji)
-
sanitized = sanitized.replace('', '')
allowed_css_properties = allowed_styles.copy()
@@ -574,9 +608,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
parse_email=False, url_re=url_re)]
).clean(sanitized)
- sanitized, emojis_used = render_emojis(sanitized)
- #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
+ #doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic) <-- i have no clue what this means lol
soup = BeautifulSoup(sanitized, 'lxml')
has_transform = bool(soup.select('[style*=transform i]'))
@@ -660,9 +693,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
html = f'{html}
'
sanitized = sanitized.replace(i.group(0), html)
- if '' not in sanitized and blackjack != "rules":
- sanitized = sanitized.replace('\n','')
-
if showmore:
# Insert a show more button if the text is too long or has too many paragraphs
CHARLIMIT = 3000
@@ -708,7 +738,9 @@ def filter_emojis_only(title, golden=True, count_emojis=False):
title = remove_cuniform(title)
- title, emojis_used = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+ res = render_emojis(title) #old_render_emoji(title, emoji_regex2, golden, emojis_used, is_title=True)
+
+ title = ''.join(map(str, res.tags))
if count_emojis:
for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)):
--
2.34.1
From c5209495fd8de884c3c2f35d76904c2851894c0b Mon Sep 17 00:00:00 2001
From: transbitch <>
Date: Mon, 25 Sep 2023 01:17:09 -0400
Subject: [PATCH 8/8] Add slur/profanity replacements
---
files/helpers/slurs_and_profanities.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/files/helpers/slurs_and_profanities.py b/files/helpers/slurs_and_profanities.py
index 37cd3ef33..7e6c5546e 100644
--- a/files/helpers/slurs_and_profanities.py
+++ b/files/helpers/slurs_and_profanities.py
@@ -11,6 +11,12 @@ def replace_train_html(body):
body = body.replace(troon, ':marseytrain2:')
return body
+trans_valid_copypasta = """You are a real woman. You have no womb, you have no ovaries, you have no eggs, but that doesn't matter. You are a valid human who is trying to feel comfortable in her body.
+All the “validation” you get is pure. Behind your back people love you. Your parents are happy and proud of you, your friends laugh at your jokes behind closed doors, and boys love you, and girls envy you.
+Men absolutely love you. Trans folk who “pass” look ordinary and natural to a man. Your bone structure does not matter. Estradiol widens the hips.
+You will be happy. You will smile every single morning and tell yourself it’s going to be ok, and deep inside you feel the euphoria creeping up like a weed. It is what defines you, not the transphobes.
+Eventually, it’ll be perfect for you - you’ll come out, start HRT, get top surgery, and finally be your ideal self. Your parents will find you, happy and relieved that they finally have a happy daughter. They’ll congratulate you on your hard journey, and every passerby for the rest of eternity will know a woman is what you are."""
+
SLURS = {
"tranny": tranny,
"trannie": trannie,
@@ -27,12 +33,16 @@ SLURS = {
"(?