MarseyWorld/files/helpers/sanitize.py

import functools
import random
import re
import signal
from functools import partial
from os import path
from urllib.parse import parse_qs, urlparse

import bleach
from bleach.css_sanitizer import CSSSanitizer
from bleach.linkifier import LinkifyFilter
from bs4 import BeautifulSoup
from mistletoe import markdown
from files.classes.domains import BannedDomain

from files.helpers.const import *
from files.helpers.const_stateful import *
from files.helpers.regex import *
from .get import *

TLDS = ( # Original gTLDs and ccTLDs
	'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
	'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
	'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl',
	'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec',
	'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf',
	'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn',
	'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo',
	'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk',
	'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo',
	'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name',
	'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg',
	'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw',
	'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st',
	'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp',
	'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn',
	'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw',
	# New gTLDs
	'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol',
	'moe','mom','monster','new','news','online','pics','press','pub','site',
	'vip','win','world','wtf','xyz','video','host','art',
	)

allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i',
	'li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul',
	'marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','audio','g')

allowed_styles = ['color', 'background-color', 'font-weight', 'text-align', 'filter',]

def allowed_attributes(tag, name, value):

	if name == 'style': return True

	if tag == 'marquee':
		if name in ['direction', 'behavior', 'scrollamount']: return True
		if name in {'height', 'width'}:
			try: value = int(value.replace('px', ''))
			except: return False
			if 0 < value <= 250: return True
		return False

	if tag == 'a':
		if name == 'href' and '\\' not in value and 'xn--' not in value:
			return True
		if name == 'rel' and value == 'nofollow noopener': return True
		if name == 'target' and value == '_blank': return True
		return False

	if tag == 'img':
		if name in ['src','data-src']: return is_safe_url(value)
		if name == 'loading' and value == 'lazy': return True
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name in ['g','b','glow'] and not value: return True
		if name in ['alt','title']: return True
		return False

	if tag == 'lite-youtube':
		if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
		if name == 'videoid': return True
		return False

	if tag == 'video':
		if name == 'controls' and value == '': return True
		if name == 'preload' and value == 'none': return True
		if name == 'src': return is_safe_url(value)
		return False

	if tag == 'audio':
		if name == 'src': return is_safe_url(value)
		if name == 'controls' and value == '': return True
		if name == 'preload' and value == 'none': return True
		return False

	if tag == 'p':
		if name == 'class' and value == 'mb-0': return True
		return False

	if tag == 'span':
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name == 'title': return True
		if name == 'alt': return True
		return False


def build_url_re(tlds, protocols):
	"""Builds the url regex used by linkifier

	If you want a different set of tlds or allowed protocols, pass those in
	and stomp on the existing ``url_re``::

		from bleach import linkifier

		my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)

		linker = LinkifyFilter(url_re=my_url_re)

	"""
	return re.compile(
		r"""\(*# Match any opening parentheses.
		\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
		([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
		(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
			# /path/zz (excluding "unsafe" chars from RFC 1738,
			# except for ~, which happens in practice)
		(?:\#[^#\s\|\\\^\[\]`<>"]*)?
			# #hash (excluding "unsafe" chars from RFC 1738,
			# except for ~, which happens in practice)
		""".format(
			"|".join(sorted(protocols)), "|".join(sorted(tlds))
		),
		re.IGNORECASE | re.VERBOSE | re.UNICODE,
	)

url_re = build_url_re(tlds=TLDS, protocols=['http', 'https'])

def callback(attrs, new=False):
	if (None, "href") not in attrs:
		return # Incorrect <a> tag

	href = attrs[(None, "href")]

	# \ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
	if "\\" in href or not ascii_only_regex.fullmatch(href):
		attrs["_text"] = href # Laugh at this user
		del attrs[(None, "href")] # Make unclickable and reset harmful payload
		return attrs

	if not href.startswith('/') and not href.startswith(f'{SITE_FULL}/'):
		attrs[(None, "target")] = "_blank"
		attrs[(None, "rel")] = "nofollow noopener"

	return attrs


def render_emoji(html, regexp, golden, marseys_used, b=False):
	emojis = list(regexp.finditer(html))
	captured = set()

	for i in emojis:
		if i.group(0) in captured: continue
		captured.add(i.group(0))

		emoji = i.group(1).lower()
		attrs = ''
		if b: attrs += ' b'
		if golden and len(emojis) <= 20 and ('marsey' in emoji or emoji in marseys_const2):
			if random.random() < 0.0025: attrs += ' g'
			elif random.random() < 0.00125: attrs += ' glow'

		old = emoji
		emoji = emoji.replace('!','').replace('#','')
		if emoji == 'marseyrandom': emoji = random.choice(marseys_const2)

		emoji_partial_pat = '<img loading="lazy" alt=":{0}:" src="{1}"{2}>'
		emoji_partial = '<img loading="lazy" data-bs-toggle="tooltip" alt=":{0}:" title=":{0}:" src="{1}"{2}>'
		emoji_html = None

		if emoji.endswith('pat') and emoji != 'marseyunpettablepat':
			if path.isfile(f"files/assets/images/emojis/{emoji.replace('pat','')}.webp"):
				emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img src="/i/hand.webp">{emoji_partial_pat.format(old, f"/e/{emoji[:-3]}.webp", attrs)}</span>'
			elif emoji.startswith('@'):
				if u := get_user(emoji[1:-3], graceful=True):
					emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img src="/i/hand.webp">{emoji_partial_pat.format(old, f"/pp/{u.id}", attrs)}</span>'
		elif path.isfile(f'files/assets/images/emojis/{emoji}.webp'):
			emoji_html = emoji_partial.format(old, f'/e/{emoji}.webp', attrs)


		if emoji_html:
			marseys_used.add(emoji)
			html = re.sub(f'(?<!"){i.group(0)}', emoji_html, html)
	return html


def with_sigalrm_timeout(timeout: int):
	'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'

	# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
	# async so if we timeout on that (or on a db op) then the process is crashed without returning
	# a proper 500 error. Oh well.
	def sig_handler(signum, frame):
		print("Timeout!", flush=True)
		raise Exception("Timeout")

	def inner(func):
		@functools.wraps(func)
		def wrapped(*args, **kwargs):
			signal.signal(signal.SIGALRM, sig_handler)
			signal.alarm(timeout)
			try:
				return func(*args, **kwargs)
			finally:
				signal.alarm(0)
		return wrapped
	return inner


def sanitize_raw_title(sanitized:Optional[str]) -> str:
	if not sanitized: return ""
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r","").replace("\n", "")
	sanitized = sanitized.strip()
	return sanitized[:POST_TITLE_LENGTH_LIMIT]

def sanitize_raw_body(sanitized:Optional[str], is_post:bool) -> str:
	if not sanitized: return ""
	sanitized = html_comment_regex.sub('', sanitized)
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r\n", "\n")
	sanitized = sanitized.strip()
	return sanitized[:POST_BODY_LENGTH_LIMIT if is_post else COMMENT_BODY_LENGTH_LIMIT]


def sanitize_settings_text(sanitized:Optional[str], max_length:Optional[int]=None) -> str:
	if not sanitized: return ""
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r", "").replace("\n","")
	sanitized = sanitized.strip()
	if max_length: sanitized = sanitized[:max_length]
	return sanitized


@with_sigalrm_timeout(5)
def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_marseys=False, torture=False):
	sanitized = sanitized.strip()

	sanitized = utm_regex.sub('', sanitized)
	sanitized = utm_regex2.sub('', sanitized)

	if torture:
		sanitized = torture_ap(sanitized, g.v.username)
		emoji = random.choice(['trumpjaktalking', 'reposthorse'])
		sanitized += f'\n:#{emoji}:'

	sanitized = normalize_url(sanitized)

	if '```' not in sanitized and '<pre>' not in sanitized:
		sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)

	sanitized = greentext_regex.sub(r'\1<g>\>\2</g>', sanitized)

	sanitized = image_regex.sub(r'\1![](\2)\5', sanitized)

	sanitized = image_check_regex.sub(r'\1', sanitized)

	sanitized = link_fix_regex.sub(r'\1https://\2', sanitized)

	if FEATURES['MARKUP_COMMANDS']:
		sanitized = command_regex.sub(command_regex_matcher, sanitized)

	sanitized = markdown(sanitized)

	sanitized = strikethrough_regex.sub(r'\1<del>\2</del>', sanitized)

	# replacing zero width characters, overlines, fake colons
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\u033f","").replace("\u0589", ":")

	sanitized = reddit_regex.sub(r'\1<a href="https://old.reddit.com/\2" rel="nofollow noopener" target="_blank">/\2</a>', sanitized)
	sanitized = sub_regex.sub(r'\1<a href="/\2">/\2</a>', sanitized)

	v = getattr(g, 'v', None)

	names = set(m.group(2) for m in mention_regex.finditer(sanitized))
	if limit_pings and len(names) > limit_pings and not v.admin_level >= PERMS['POST_COMMENT_INFINITE_PINGS']: abort(406)
	users_list = get_users(names, graceful=True)
	users_dict = {}
	for u in users_list:
		users_dict[u.username.lower()] = u
		if u.original_username:
			users_dict[u.original_username.lower()] = u

	def replacer(m):
		u = users_dict.get(m.group(2).lower())
		if not u:
			return m.group(0)
		return f'{m.group(1)}<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>'

	sanitized = mention_regex.sub(replacer, sanitized)

	soup = BeautifulSoup(sanitized, 'lxml')

	for tag in soup.find_all("img"):
		if tag.get("src") and not tag["src"].startswith('/pp/'):
			if not is_safe_url(tag["src"]):
				a = soup.new_tag("a", href=tag["src"], rel="nofollow noopener", target="_blank")
				a.string = tag["src"]
				tag.replace_with(a)
				continue

			tag["loading"] = "lazy"
			tag["data-src"] = tag["src"]
			tag["src"] = "/i/l.webp"
			tag['alt'] = f'![]({tag["data-src"]})'

			if tag.parent.name != 'a':
				a = soup.new_tag("a", href=tag["data-src"])
				if not is_site_url(a["href"]):
					a["rel"] = "nofollow noopener"
					a["target"] = "_blank"
				tag = tag.replace_with(a)
				a.append(tag)

	for tag in soup.find_all("a"):
		if not tag.contents or not str(tag.contents[0]).strip():
			tag.extract()
		if tag.get("href") and fishylinks_regex.fullmatch(str(tag.string)):
			tag.string = tag["href"]


	sanitized = str(soup)

	sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)

	marseys_used = set()

	emojis = list(emoji_regex.finditer(sanitized))
	if len(emojis) > 20: golden = False

	captured = []
	for i in emojis:
		if i.group(0) in captured: continue
		captured.append(i.group(0))

		old = i.group(0)
		if 'marseylong1' in old or 'marseylong2' in old or 'marseyllama1' in old or 'marseyllama2' in old: new = old.lower().replace(">", " class='mb-0'>")
		else: new = old.lower()

		new = render_emoji(new, emoji_regex2, golden, marseys_used, True)

		sanitized = sanitized.replace(old, new)

	emojis = list(emoji_regex2.finditer(sanitized))
	if len(emojis) > 20: golden = False

	sanitized = render_emoji(sanitized, emoji_regex2, golden, marseys_used)

	sanitized = sanitized.replace('&amp;','&')

	captured = []
	for i in youtube_regex.finditer(sanitized):
		if i.group(0) in captured: continue
		captured.append(i.group(0))

		params = parse_qs(urlparse(i.group(2)).query, keep_blank_values=True)
		t = params.get('t', params.get('start', [0]))[0]
		if isinstance(t, str): t = t.replace('s','')

		htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
		if t:
			try: htmlsource += f'&start={int(t)}'
			except: pass
		htmlsource += '"></lite-youtube>'

		sanitized = sanitized.replace(i.group(0), htmlsource)

	sanitized = video_sub_regex.sub(r'\1<video controls preload="none" src="\2"></video>', sanitized)
	sanitized = audio_sub_regex.sub(r'\1<audio controls preload="none" src="\2"></audio>', sanitized)

	if count_marseys:
		for marsey in g.db.query(Marsey).filter(Marsey.submitter_id==None, Marsey.name.in_(marseys_used)).all():
			marsey.count += 1
			g.db.add(marsey)

	sanitized = sanitized.replace('<p></p>', '')
	sanitized = sanitized.replace('<html><body>','').replace('</body></html>','')

	css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_styles)
	sanitized = bleach.Cleaner(tags=allowed_tags,
								attributes=allowed_attributes,
								protocols=['http', 'https'],
								css_sanitizer=css_sanitizer,
								filters=[partial(LinkifyFilter, skip_tags=["pre"],
									parse_email=False, callbacks=[callback], url_re=url_re)]
								).clean(sanitized)

	soup = BeautifulSoup(sanitized, 'lxml')

	links = soup.find_all("a")

	domain_list = set()

	for link in links:
		href = link.get("href")
		if not href: continue
		url = urlparse(href)
		d = tldextract.extract(href).registered_domain + url.path
		domain_list.add(d.lower())

	banned_domains = g.db.query(BannedDomain).all()
	for x in banned_domains:
		for y in domain_list:
			if y.startswith(x.domain):
				abort(403, description=f'Remove the banned link "{x.domain}" and try again!\nReason for link ban: "{x.reason}"')

	if '<pre>' not in sanitized:
		sanitized = sanitized.replace('\n','')

	if showmore and len(sanitized) > 3500:
		sanitized = showmore_regex.sub(r'\1<p><button class="showmore" onclick="showmore()">SHOW MORE</button></p><d class="d-none">\2</d>', sanitized, count=1)

	return sanitized.strip()


def allowed_attributes_emojis(tag, name, value):

	if tag == 'img':
		if name == 'src' and value.startswith('/') and '\\' not in value: return True
		if name == 'loading' and value == 'lazy': return True
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name in ['g','glow'] and not value: return True
		if name in ['alt','title']: return True

	if tag == 'span':
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name == 'title': return True
		if name == 'alt': return True
		return False
	return False


@with_sigalrm_timeout(1)
def filter_emojis_only(title, golden=True, count_marseys=False, graceful=False, torture=False):
	title = title.strip()

	if torture:
		title = torture_ap(title, g.v.username)

	title = title.replace('‎','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&amp;").replace('<','&lt;').replace('>','&gt;').replace('"', '&quot;').replace("'", "&#039;").strip()

	marseys_used = set()

	title = render_emoji(title, emoji_regex3, golden, marseys_used)

	if count_marseys:
		for marsey in g.db.query(Marsey).filter(Marsey.submitter_id==None, Marsey.name.in_(marseys_used)).all():
			marsey.count += 1
			g.db.add(marsey)

	title = strikethrough_regex.sub(r'\1<del>\2</del>', title)

	title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','').strip()

	if len(title) > POST_TITLE_HTML_LENGTH_LIMIT and not graceful: abort(400)
	else: return title

def normalize_url(url):
	url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)

	url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
			 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
			 .replace("https://www.youtube.com", "https://youtube.com") \
			 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
			 .replace("https://youtube.com/v/", "https://youtube.com/watch?v=") \
			 .replace("https://mobile.twitter.com", "https://twitter.com") \
			 .replace("https://m.facebook.com", "https://facebook.com") \
			 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
			 .replace("https://m.youtube.com", "https://youtube.com") \
			 .replace("https://www.twitter.com", "https://twitter.com") \
			 .replace("https://www.instagram.com", "https://instagram.com") \
			 .replace("https://www.tiktok.com", "https://tiktok.com") \
			 .replace("https://www.streamable.com", "https://streamable.com") \
			 .replace("https://streamable.com/", "https://streamable.com/e/") \
			 .replace("https://streamable.com/e/e/", "https://streamable.com/e/") \
			 .replace("https://search.marsey.cat/#", "https://camas.unddit.com/#") \
			 .replace("https://imgur.com/", "https://i.imgur.com/") \
			 .replace("https://nitter.net/", "https://twitter.com/") \
			 .replace("https://nitter.42l.fr/", "https://twitter.com/") \
			 .replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/")

	url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
	url = giphy_regex.sub(r'\1.webp', url)

	return url

def validate_css(css):
	if '@import' in css:
		return False, "@import statements are not allowed!"

	for i in css_url_regex.finditer(css):
		url = i.group(1)
		if not is_safe_url(url):
			domain = tldextract.extract(url).registered_domain
			return False, f"The domain '{domain}' is not allowed, please use one of these domains\n\n{approved_embed_hosts}."

	return True, ""
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								import functools
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								import random
 								import re
 								import signal
 								from functools import partial
 								from os import path
 								from urllib.parse import parse_qs, urlparse
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								import bleach
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+								from bleach.css_sanitizer import CSSSanitizer
-												fix camas.unddit.com

											
										
										
											2022-07-15 13:27:45 +00:00
+								from bleach.linkifier import LinkifyFilter
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								from bs4 import BeautifulSoup
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								from mistletoe import markdown
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								from files.classes.domains import BannedDomain
 								from files.helpers.const import *
 								from files.helpers.const_stateful import *
 								from files.helpers.regex import *
 								from .get import *
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								TLDS = ( # Original gTLDs and ccTLDs
 									'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
 									'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
 									'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl',
 									'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec',
 									'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf',
 									'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn',
 									'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo',
 									'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk',
 									'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo',
 									'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name',
 									'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg',
 									'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw',
 									'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st',
 									'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp',
 									'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn',
 									'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw',
 									# New gTLDs
 									'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol',
 									'moe','mom','monster','new','news','online','pics','press','pub','site',
-												add .host and .art

											
										
										
											2022-11-22 21:20:24 +00:00
+									'vip','win','world','wtf','xyz','video','host','art',
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+									)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												sanitize: Add more neo-gTLDs to TLDs tuple-list.

Per https://rdrama.net/post/70341/-/1976650 added more gTLDs that
are actually desired by site users.

Also, hard wrapped the `TLDS` and `allowed_tags` tuple-lists at a
100char hard ruler for my sanity.

											
										
										
											2022-05-24 19:16:55 +00:00
+								allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i',
 									'li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul',
-												fix videos

											
										
										
											2022-10-23 08:57:13 +00:00
+									'marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','audio','g')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												Revert "remove "filter" from allowed styles"

This reverts commit 14d929623e5729bf5490d9a91bf11b2466b9a3bd.

											
										
										
											2022-06-30 22:18:05 +00:00
+								allowed_styles = ['color', 'background-color', 'font-weight', 'text-align', 'filter',]
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								def allowed_attributes(tag, name, value):
 									if name == 'style': return True
 									if tag == 'marquee':
 										if name in ['direction', 'behavior', 'scrollamount']: return True
 										if name in {'height', 'width'}:
 											try: value = int(value.replace('px', ''))
 											except: return False
 											if 0 < value <= 250: return True
 										return False
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									if tag == 'a':
-												allow only ascii characters in links (https://rdrama.net/comment/2150032)

											
										
										
											2022-06-19 17:25:55 +00:00
+										if name == 'href' and '\\' not in value and 'xn--' not in value:
 											return True
-												remove redundant "noreferrer"

											
										
										
											2022-10-29 21:46:30 +00:00
+										if name == 'rel' and value == 'nofollow noopener': return True
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+										if name == 'target' and value == '_blank': return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										return False
 									if tag == 'img':
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name in ['src','data-src']: return is_safe_url(value)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										if name == 'loading' and value == 'lazy': return True
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
-												add glowie marseys

											
										
										
											2022-06-23 00:34:37 +00:00
+										if name in ['g','b','glow'] and not value: return True
-												fc

											
										
										
											2022-05-18 18:45:04 +00:00
+										if name in ['alt','title']: return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										return False
 									if tag == 'lite-youtube':
 										if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
 										if name == 'videoid': return True
 										return False
 									if tag == 'video':
 										if name == 'controls' and value == '': return True
 										if name == 'preload' and value == 'none': return True
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src': return is_safe_url(value)
-												fix videos

											
										
										
											2022-10-23 08:57:13 +00:00
+										return False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												Add <audio> support to sanitize.py.

Parallels the decisions made with <video>.

											
										
										
											2022-05-15 22:47:37 +00:00
+									if tag == 'audio':
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src': return is_safe_url(value)
-												Add <audio> support to sanitize.py.

Parallels the decisions made with <video>.

											
										
										
											2022-05-15 22:47:37 +00:00
+										if name == 'controls' and value == '': return True
 										if name == 'preload' and value == 'none': return True
 										return False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									if tag == 'p':
 										if name == 'class' and value == 'mb-0': return True
 										return False
 									if tag == 'span':
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
 										if name == 'title': return True
 										if name == 'alt': return True
 										return False
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								def build_url_re(tlds, protocols):
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+									"""Builds the url regex used by linkifier
 									If you want a different set of tlds or allowed protocols, pass those in
 									and stomp on the existing ``url_re``::
 										from bleach import linkifier
 										my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)
 										linker = LinkifyFilter(url_re=my_url_re)
 									"""
 									return re.compile(
 										r"""\(*# Match any opening parentheses.
 										\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+										([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+										(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
 											# /path/zz (excluding "unsafe" chars from RFC 1738,
 											# except for ~, which happens in practice)
 										(?:\#[^#\s\|\\\^\[\]`<>"]*)?
 											# #hash (excluding "unsafe" chars from RFC 1738,
 											# except for ~, which happens in practice)
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+										""".format(
 											"|".join(sorted(protocols)), "|".join(sorted(tlds))
 										),
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+										re.IGNORECASE | re.VERBOSE | re.UNICODE,
 									)
-												fix camas.unddit.com

											
										
										
											2022-07-15 13:27:45 +00:00
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								url_re = build_url_re(tlds=TLDS, protocols=['http', 'https'])
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 								def callback(attrs, new=False):
-												Fix <a> tag link injection, don't throw a ValueError when href attr is missing, properly count marseys (their usage wasn't counted for 20 days :marseygasp:) (#265)


											
										
										
											2022-05-17 18:59:07 +00:00
+									if (None, "href") not in attrs:
 										return # Incorrect <a> tag
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									href = attrs[(None, "href")]
-												Fix <a> tag link injection, don't throw a ValueError when href attr is missing, properly count marseys (their usage wasn't counted for 20 days :marseygasp:) (#265)


											
										
										
											2022-05-17 18:59:07 +00:00
+									# \ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
-												allow only ascii characters in links (https://rdrama.net/comment/2150032)

											
										
										
											2022-06-19 17:25:55 +00:00
+									if "\\" in href or not ascii_only_regex.fullmatch(href):
-												Fix <a> tag link injection, don't throw a ValueError when href attr is missing, properly count marseys (their usage wasn't counted for 20 days :marseygasp:) (#265)


											
										
										
											2022-05-17 18:59:07 +00:00
+										attrs["_text"] = href # Laugh at this user
 										del attrs[(None, "href")] # Make unclickable and reset harmful payload
 										return attrs
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+									if not href.startswith('/') and not href.startswith(f'{SITE_FULL}/'):
 										attrs[(None, "target")] = "_blank"
-												remove redundant "noreferrer"

											
										
										
											2022-10-29 21:46:30 +00:00
+										attrs[(None, "rel")] = "nofollow noopener"
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									return attrs
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+								def render_emoji(html, regexp, golden, marseys_used, b=False):
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									emojis = list(regexp.finditer(html))
 									captured = set()
 									for i in emojis:
 										if i.group(0) in captured: continue
 										captured.add(i.group(0))
 										emoji = i.group(1).lower()
 										attrs = ''
 										if b: attrs += ' b'
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+										if golden and len(emojis) <= 20 and ('marsey' in emoji or emoji in marseys_const2):
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+											if random.random() < 0.0025: attrs += ' g'
 											elif random.random() < 0.00125: attrs += ' glow'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										old = emoji
 										emoji = emoji.replace('!','').replace('#','')
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+										if emoji == 'marseyrandom': emoji = random.choice(marseys_const2)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										emoji_partial_pat = '<img loading="lazy" alt=":{0}:" src="{1}"{2}>'
 										emoji_partial = '<img loading="lazy" data-bs-toggle="tooltip" alt=":{0}:" title=":{0}:" src="{1}"{2}>'
 										emoji_html = None
-												make :marseyunpettable: unpettable

											
										
										
											2022-07-08 15:39:54 +00:00
+										if emoji.endswith('pat') and emoji != 'marseyunpettablepat':
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											if path.isfile(f"files/assets/images/emojis/{emoji.replace('pat','')}.webp"):
-												add a shortened endpoint for static images and rename loading.webp to l.webp (to save bytes)

											
										
										
											2022-06-22 15:51:19 +00:00
+												emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img src="/i/hand.webp">{emoji_partial_pat.format(old, f"/e/{emoji[:-3]}.webp", attrs)}</span>'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											elif emoji.startswith('@'):
 												if u := get_user(emoji[1:-3], graceful=True):
-												add a shortened endpoint for static images and rename loading.webp to l.webp (to save bytes)

											
										
										
											2022-06-22 15:51:19 +00:00
+													emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img src="/i/hand.webp">{emoji_partial_pat.format(old, f"/pp/{u.id}", attrs)}</span>'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										elif path.isfile(f'files/assets/images/emojis/{emoji}.webp'):
 											emoji_html = emoji_partial.format(old, f'/e/{emoji}.webp', attrs)
 										if emoji_html:
-												Fix <a> tag link injection, don't throw a ValueError when href attr is missing, properly count marseys (their usage wasn't counted for 20 days :marseygasp:) (#265)


											
										
										
											2022-05-17 18:59:07 +00:00
+											marseys_used.add(emoji)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											html = re.sub(f'(?<!"){i.group(0)}', emoji_html, html)
 									return html
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								def with_sigalrm_timeout(timeout: int):
 									'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+									# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
 									# async so if we timeout on that (or on a db op) then the process is crashed without returning
 									# a proper 500 error. Oh well.
 									def sig_handler(signum, frame):
 										print("Timeout!", flush=True)
 										raise Exception("Timeout")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+									def inner(func):
-												sanitize.with_sigalrm_timeout: functools.wrap fix.

											
										
										
											2022-07-06 09:01:48 +00:00
+										@functools.wraps(func)
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+										def wrapped(*args, **kwargs):
 											signal.signal(signal.SIGALRM, sig_handler)
 											signal.alarm(timeout)
 											try:
 												return func(*args, **kwargs)
 											finally:
 												signal.alarm(0)
 										return wrapped
 									return inner
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_raw_title(sanitized:Optional[str]) -> str:
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									if not sanitized: return ""
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
+									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r","").replace("\n", "")
 									sanitized = sanitized.strip()
-												introduce constant for post and title length

											
										
										
											2022-10-05 08:35:35 +00:00
+									return sanitized[:POST_TITLE_LENGTH_LIMIT]
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_raw_body(sanitized:Optional[str], is_post:bool) -> str:
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									if not sanitized: return ""
-												sanitize: strip out html comments
they're unnecessary and never get rendered

											
										
										
											2022-10-20 23:06:55 +00:00
+									sanitized = html_comment_regex.sub('', sanitized)
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r\n", "\n")
 									sanitized = sanitized.strip()
-												comment sanity checks and constantization
* make HTML body length a constant and use it
* abort before uploads and other tasks if comment level is too deep
* what a nightmare of two functions, please do better next time

											
										
										
											2022-10-09 12:54:46 +00:00
+									return sanitized[:POST_BODY_LENGTH_LIMIT if is_post else COMMENT_BODY_LENGTH_LIMIT]
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_settings_text(sanitized:Optional[str], max_length:Optional[int]=None) -> str:
 									if not sanitized: return ""
 									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r", "").replace("\n","")
 									sanitized = sanitized.strip()
 									if max_length: sanitized = sanitized[:max_length]
 									return sanitized
-												increase sanitize timeout limit from 2 seconds to 5 seconds

											
										
										
											2022-09-29 10:34:09 +00:00
+								@with_sigalrm_timeout(5)
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+								def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_marseys=False, torture=False):
-												allow uploading of all types of files (using lain.la)
+ allow multiple file upload in bios and messaging admins

											
										
										
											2022-06-18 15:53:34 +00:00
+									sanitized = sanitized.strip()
-												fix utm regex

											
										
										
											2022-09-23 13:23:11 +00:00
+									sanitized = utm_regex.sub('', sanitized)
 									sanitized = utm_regex2.sub('', sanitized)
-												refactor torture_ap

											
										
										
											2022-09-05 20:05:04 +00:00
+									if torture:
 										sanitized = torture_ap(sanitized, g.v.username)
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+										emoji = random.choice(['trumpjaktalking', 'reposthorse'])
-												random fix

											
										
										
											2022-10-31 01:10:33 +00:00
+										sanitized += f'\n:#{emoji}:'
-												refactor torture_ap

											
										
										
											2022-09-05 20:05:04 +00:00
-												fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2178607?context=8#context

											
										
										
											2022-06-23 19:43:49 +00:00
+									sanitized = normalize_url(sanitized)
-												sfd

											
										
										
											2022-05-27 18:28:54 +00:00
+									if '```' not in sanitized and '<pre>' not in sanitized:
-												sfd

											
										
										
											2022-05-08 09:06:01 +00:00
+										sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												greentext fix

											
										
										
											2022-06-19 15:22:06 +00:00
+									sanitized = greentext_regex.sub(r'\1<g>\>\2</g>', sanitized)
-												added greentext

											
										
										
											2022-06-19 15:05:50 +00:00
-												small image embed improvements

											
										
										
											2022-06-11 12:21:59 +00:00
+									sanitized = image_regex.sub(r'\1![](\2)\5', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									sanitized = image_check_regex.sub(r'\1', sanitized)
-												Fix timeout in sanitize from link_fix_regex.

h/t to @official-techsupport for finding and help fixing this bug.
When given certain pathological input, `sanitize` would time out
(notably only on posts, rather than comments, perhaps due to the
longer maximum length of input). For example, using as input the
result of:

    with open("test.txt", "w") as f:
        for i in range(26):
            f.write(f":{chr(ord('a') + i)}: ")
        f.write('x' * 20_000)

We believe this to be because of some combination of the greedy
quantifiers and the negative lookahead before the match. The regex
was rewritten to (in theory) have much more linear performance.

											
										
										
											2022-06-25 05:28:43 +00:00
+									sanitized = link_fix_regex.sub(r'\1https://\2', sanitized)
-												fg

											
										
										
											2022-05-07 05:28:51 +00:00
-												LGB: disable markup commands.

											
										
										
											2022-07-20 00:07:38 +00:00
+									if FEATURES['MARKUP_COMMANDS']:
 										sanitized = command_regex.sub(command_regex_matcher, sanitized)
-												refactor comment commands

											
										
										
											2022-07-11 12:14:18 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = markdown(sanitized)
-												fix strikethrough, v2

											
										
										
											2022-06-28 05:52:29 +00:00
+									sanitized = strikethrough_regex.sub(r'\1<del>\2</del>', sanitized)
-												sanitize: replace cuneiform ban with an overline ban

											
										
										
											2022-10-20 14:44:32 +00:00
+									# replacing zero width characters, overlines, fake colons
 									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\u033f","").replace("\u0589", ":")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+									sanitized = reddit_regex.sub(r'\1<a href="https://old.reddit.com/\2" rel="nofollow noopener" target="_blank">/\2</a>', sanitized)
-												remove the need for alert flag on sanitize()

											
										
										
											2022-06-22 22:12:47 +00:00
+									sanitized = sub_regex.sub(r'\1<a href="/\2">/\2</a>', sanitized)
-												make non-jannies unable to ping more than 3 ppl

											
										
										
											2022-07-29 13:23:34 +00:00
+									v = getattr(g, 'v', None)
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
+									names = set(m.group(2) for m in mention_regex.finditer(sanitized))
-												pings bypass permission

											
										
										
											2022-10-12 09:36:29 +00:00
+									if limit_pings and len(names) > limit_pings and not v.admin_level >= PERMS['POST_COMMENT_INFINITE_PINGS']: abort(406)
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
+									users_list = get_users(names, graceful=True)
 									users_dict = {}
 									for u in users_list:
 										users_dict[u.username.lower()] = u
 										if u.original_username:
 											users_dict[u.original_username.lower()] = u
 									def replacer(m):
 										u = users_dict.get(m.group(2).lower())
 										if not u:
 											return m.group(0)
 										return f'{m.group(1)}<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>'
 									sanitized = mention_regex.sub(replacer, sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									soup = BeautifulSoup(sanitized, 'lxml')
 									for tag in soup.find_all("img"):
 										if tag.get("src") and not tag["src"].startswith('/pp/'):
-												add more image validation (to fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2298173?context=8#context)

											
										
										
											2022-07-12 20:30:00 +00:00
+											if not is_safe_url(tag["src"]):
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+												a = soup.new_tag("a", href=tag["src"], rel="nofollow noopener", target="_blank")
-												add more image validation (to fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2298173?context=8#context)

											
										
										
											2022-07-12 20:30:00 +00:00
+												a.string = tag["src"]
 												tag.replace_with(a)
 												continue
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											tag["loading"] = "lazy"
 											tag["data-src"] = tag["src"]
-												add a shortened endpoint for static images and rename loading.webp to l.webp (to save bytes)

											
										
										
											2022-06-22 15:51:19 +00:00
+											tag["src"] = "/i/l.webp"
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											tag['alt'] = f'![]({tag["data-src"]})'
-												kitchen sink commit, all over the place

											
										
										
											2022-07-02 10:44:05 +00:00
-												fix snappy archiving images

											
										
										
											2022-07-02 00:25:58 +00:00
+											if tag.parent.name != 'a':
-												kitchen sink commit, all over the place

											
										
										
											2022-07-02 10:44:05 +00:00
+												a = soup.new_tag("a", href=tag["data-src"])
 												if not is_site_url(a["href"]):
-												remove redundant "noreferrer"

											
										
										
											2022-10-29 21:46:30 +00:00
+													a["rel"] = "nofollow noopener"
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+													a["target"] = "_blank"
-												fix snappy archiving images

											
										
										
											2022-07-02 00:25:58 +00:00
+												tag = tag.replace_with(a)
 												a.append(tag)
-												embed rework

											
										
										
											2022-06-27 01:00:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									for tag in soup.find_all("a"):
-												remove empty anchor tags

											
										
										
											2022-07-02 00:54:59 +00:00
+										if not tag.contents or not str(tag.contents[0]).strip():
 											tag.extract()
-												Revert "delete empty <a> tags to prevent exploits"

This reverts commit 5f78b4e36581acb890c382b783d90e58a940c47c.

This commit is breaking @-mentions in a great many contexts and
I'm not quite sure how to fix it right now.

											
										
										
											2022-06-22 21:59:30 +00:00
+										if tag.get("href") and fishylinks_regex.fullmatch(str(tag.string)):
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											tag.string = tag["href"]
 									sanitized = str(soup)
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									marseys_used = set()
 									emojis = list(emoji_regex.finditer(sanitized))
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if len(emojis) > 20: golden = False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									captured = []
 									for i in emojis:
 										if i.group(0) in captured: continue
 										captured.append(i.group(0))
 										old = i.group(0)
 										if 'marseylong1' in old or 'marseylong2' in old or 'marseyllama1' in old or 'marseyllama2' in old: new = old.lower().replace(">", " class='mb-0'>")
 										else: new = old.lower()
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+										new = render_emoji(new, emoji_regex2, golden, marseys_used, True)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										sanitized = sanitized.replace(old, new)
 									emojis = list(emoji_regex2.finditer(sanitized))
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if len(emojis) > 20: golden = False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									sanitized = render_emoji(sanitized, emoji_regex2, golden, marseys_used)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fixing https://rdrama.net/post/69817/tfw-you-will-never-troll-harder/1961118?context=8#context

											
										
										
											2022-05-22 10:20:11 +00:00
+									sanitized = sanitized.replace('&amp;','&')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									captured = []
 									for i in youtube_regex.finditer(sanitized):
 										if i.group(0) in captured: continue
 										captured.append(i.group(0))
-												keep_blank_values

											
										
										
											2022-09-01 20:46:57 +00:00
+										params = parse_qs(urlparse(i.group(2)).query, keep_blank_values=True)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										t = params.get('t', params.get('start', [0]))[0]
 										if isinstance(t, str): t = t.replace('s','')
 										htmlsource = f'{i.group(1)}<lite-youtube videoid="{i.group(3)}" params="autoplay=1&modestbranding=1'
-												make the youtube code in sanitize be like the youtube code in posts.py

											
										
										
											2022-11-19 12:34:38 +00:00
+										if t:
 											try: htmlsource += f'&start={int(t)}'
 											except: pass
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										htmlsource += '"></lite-youtube>'
 										sanitized = sanitized.replace(i.group(0), htmlsource)
-												change video tags

											
										
										
											2022-10-20 20:26:39 +00:00
+									sanitized = video_sub_regex.sub(r'\1<video controls preload="none" src="\2"></video>', sanitized)
-												revert an earlier change for video and audio files in sanitize()

											
										
										
											2022-10-01 17:42:34 +00:00
+									sanitized = audio_sub_regex.sub(r'\1<audio controls preload="none" src="\2"></audio>', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if count_marseys:
-												add Marsey Submission UI

											
										
										
											2022-09-09 09:13:50 +00:00
+										for marsey in g.db.query(Marsey).filter(Marsey.submitter_id==None, Marsey.name.in_(marseys_used)).all():
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											marsey.count += 1
 											g.db.add(marsey)
-												fds

											
										
										
											2022-05-15 08:45:57 +00:00
+									sanitized = sanitized.replace('<p></p>', '')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = sanitized.replace('<html><body>','').replace('</body></html>','')
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+									css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_styles)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = bleach.Cleaner(tags=allowed_tags,
 																attributes=allowed_attributes,
 																protocols=['http', 'https'],
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+																css_sanitizer=css_sanitizer,
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+																filters=[partial(LinkifyFilter, skip_tags=["pre"],
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+																	parse_email=False, callbacks=[callback], url_re=url_re)]
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+																).clean(sanitized)
 									soup = BeautifulSoup(sanitized, 'lxml')
 									links = soup.find_all("a")
 									domain_list = set()
 									for link in links:
 										href = link.get("href")
 										if not href: continue
 										url = urlparse(href)
-												ignore subdomains in banned_domains logic

											
										
										
											2022-10-28 18:12:37 +00:00
+										d = tldextract.extract(href).registered_domain + url.path
-												make banned domains case insensitive

											
										
										
											2022-10-31 14:33:11 +00:00
+										domain_list.add(d.lower())
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												refactor banned domains

											
										
										
											2022-10-27 22:37:24 +00:00
+									banned_domains = g.db.query(BannedDomain).all()
 									for x in banned_domains:
 										for y in domain_list:
 											if y.startswith(x.domain):
 												abort(403, description=f'Remove the banned link "{x.domain}" and try again!\nReason for link ban: "{x.reason}"')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix issue with code blocks

											
										
										
											2022-06-30 23:01:10 +00:00
+									if '<pre>' not in sanitized:
 										sanitized = sanitized.replace('\n','')
-												add "show more..." button

											
										
										
											2022-06-29 00:55:44 +00:00
-												Re-enable showmore_regex for Snappy (#389)

* Re-enable showmore_regex for Snappy

* showmore a bit more aggressively
											
										
										
											2022-10-06 06:17:28 +00:00
+									if showmore and len(sanitized) > 3500:
-												fix showmore quadratic behavior (#343)


											
										
										
											2022-08-30 21:19:53 +00:00
+										sanitized = showmore_regex.sub(r'\1<p><button class="showmore" onclick="showmore()">SHOW MORE</button></p><d class="d-none">\2</d>', sanitized, count=1)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												strip sanitized text

											
										
										
											2022-07-02 10:12:52 +00:00
+									return sanitized.strip()
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 								def allowed_attributes_emojis(tag, name, value):
 									if tag == 'img':
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src' and value.startswith('/') and '\\' not in value: return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										if name == 'loading' and value == 'lazy': return True
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
-												add glowie marseys

											
										
										
											2022-06-23 00:34:37 +00:00
+										if name in ['g','glow'] and not value: return True
-												fc

											
										
										
											2022-05-18 18:45:04 +00:00
+										if name in ['alt','title']: return True
-												pls review

											
										
										
											2022-05-17 19:58:41 +00:00
 									if tag == 'span':
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
 										if name == 'title': return True
 										if name == 'alt': return True
 										return False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									return False
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								@with_sigalrm_timeout(1)
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+								def filter_emojis_only(title, golden=True, count_marseys=False, graceful=False, torture=False):
-												refactor torture_ap

											
										
										
											2022-09-05 20:05:04 +00:00
+									title = title.strip()
 									if torture:
 										title = torture_ap(title, g.v.username)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									title = title.replace('‎','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&amp;").replace('<','&lt;').replace('>','&gt;').replace('"', '&quot;').replace("'", "&#039;").strip()
-												make marsey counter work everywhere (not just comments)

											
										
										
											2022-06-13 18:05:24 +00:00
+									marseys_used = set()
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									title = render_emoji(title, emoji_regex3, golden, marseys_used)
-												make marsey counter work everywhere (not just comments)

											
										
										
											2022-06-13 18:05:24 +00:00
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if count_marseys:
-												add Marsey Submission UI

											
										
										
											2022-09-09 09:13:50 +00:00
+										for marsey in g.db.query(Marsey).filter(Marsey.submitter_id==None, Marsey.name.in_(marseys_used)).all():
-												make marsey counter work everywhere (not just comments)

											
										
										
											2022-06-13 18:05:24 +00:00
+											marsey.count += 1
 											g.db.add(marsey)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fixed strikethrough

											
										
										
											2022-06-28 05:41:21 +00:00
+									title = strikethrough_regex.sub(r'\1<del>\2</del>', title)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												introduce constant for post and title length

											
										
										
											2022-10-05 08:35:35 +00:00
+									title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','').strip()
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												introduce constant for post and title length

											
										
										
											2022-10-05 08:35:35 +00:00
+									if len(title) > POST_TITLE_HTML_LENGTH_LIMIT and not graceful: abort(400)
 									else: return title
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												re-refactor normalize_url

											
										
										
											2022-06-10 20:02:15 +00:00
+								def normalize_url(url):
-												fix reddit domain replacement

											
										
										
											2022-07-04 03:08:33 +00:00
+									url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
-												re-refactor normalize_url

											
										
										
											2022-06-10 20:02:15 +00:00
-												refactor normalizing urls at runtime (I put the function in comment.py cuz there were weird import errors that i didnt wanna fix)

											
										
										
											2022-06-23 15:47:57 +00:00
+									url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
+											 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
-												fix youtube embedding

											
										
										
											2022-08-24 22:02:06 +00:00
+											 .replace("https://www.youtube.com", "https://youtube.com") \
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
+											 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
-												fix youtube embedding

											
										
										
											2022-08-24 22:02:06 +00:00
+											 .replace("https://youtube.com/v/", "https://youtube.com/watch?v=") \
-												refactor normalizing urls at runtime (I put the function in comment.py cuz there were weird import errors that i didnt wanna fix)

											
										
										
											2022-06-23 15:47:57 +00:00
+											 .replace("https://mobile.twitter.com", "https://twitter.com") \
 											 .replace("https://m.facebook.com", "https://facebook.com") \
 											 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
 											 .replace("https://m.youtube.com", "https://youtube.com") \
 											 .replace("https://www.twitter.com", "https://twitter.com") \
 											 .replace("https://www.instagram.com", "https://instagram.com") \
 											 .replace("https://www.tiktok.com", "https://tiktok.com") \
 											 .replace("https://www.streamable.com", "https://streamable.com") \
-												replace streamable links with full-size version

											
										
										
											2022-06-10 14:35:09 +00:00
+											 .replace("https://streamable.com/", "https://streamable.com/e/") \
-												replace search.marsey.cat with camas.unddit.com

											
										
										
											2022-07-15 13:00:51 +00:00
+											 .replace("https://streamable.com/e/e/", "https://streamable.com/e/") \
-												replace "https://imgur.com/" with "https://i.imgur.com/" for the sake of mobilecels

											
										
										
											2022-08-13 05:06:53 +00:00
+											 .replace("https://search.marsey.cat/#", "https://camas.unddit.com/#") \
-												nitter shit

											
										
										
											2022-09-29 05:36:10 +00:00
+											 .replace("https://imgur.com/", "https://i.imgur.com/") \
 											 .replace("https://nitter.net/", "https://twitter.com/") \
 											 .replace("https://nitter.42l.fr/", "https://twitter.com/") \
 											 .replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/")
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												constantify image extensions and fix imgur .gif bug

											
										
										
											2022-11-05 21:01:23 +00:00
+									url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
-												small image embed improvements

											
										
										
											2022-06-11 12:21:59 +00:00
+									url = giphy_regex.sub(r'\1.webp', url)
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												remove weird trailing tabs

											
										
										
											2022-06-11 09:56:16 +00:00
+									return url
-												Check URI approved embed in all CSS contexts.

											
										
										
											2022-08-05 17:09:41 +00:00
 								def validate_css(css):
 									if '@import' in css:
-												change wording of an error msg

											
										
										
											2022-10-06 05:08:48 +00:00
+										return False, "@import statements are not allowed!"
-												Check URI approved embed in all CSS contexts.

											
										
										
											2022-08-05 17:09:41 +00:00
 									for i in css_url_regex.finditer(css):
 										url = i.group(1)
 										if not is_safe_url(url):
 											domain = tldextract.extract(url).registered_domain
 											return False, f"The domain '{domain}' is not allowed, please use one of these domains\n\n{approved_embed_hosts}."
 									return True, ""