MarseyWorld/files/helpers/sanitize.py

import functools
import random
import re
import signal
from functools import partial
from os import path, listdir
from typing import Any
from urllib.parse import parse_qs, urlparse, unquote

import bleach
from bleach.css_sanitizer import CSSSanitizer
from bleach.linkifier import LinkifyFilter
from bs4 import BeautifulSoup
from mistletoe import markdown

from files.classes.domains import BannedDomain
from files.classes.mod_logs import ModAction
from files.classes.notifications import Notification
from files.classes.group import Group

from files.helpers.config.const import *
from files.helpers.const_stateful import *
from files.helpers.regex import *
from files.helpers.get import *

TLDS = ( # Original gTLDs and ccTLDs
	'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
	'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
	'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl',
	'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec',
	'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf',
	'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn',
	'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo',
	'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk',
	'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo',
	'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name',
	'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg',
	'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw',
	'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st',
	'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp',
	'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn',
	'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw',
	# New gTLDs
	'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol',
	'moe','mom','monster','new','news','online','pics','press','pub','site','blog',
	'vip','win','world','wtf','xyz','video','host','art','media','wiki','tech',
	'cooking','network','party','goog','markets',
	)

allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i',
	'li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul',
	'marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','audio','g','u')

allowed_styles = ['color', 'background-color', 'font-weight', 'text-align']

def allowed_attributes(tag, name, value):

	if name == 'style': return True

	if tag == 'marquee':
		if name in {'direction', 'behavior', 'scrollamount'}: return True
		if name in {'height', 'width'}:
			try: value = int(value.replace('px', ''))
			except: return False
			if 0 < value <= 250: return True

	if tag == 'a':
		if name == 'href' and '\\' not in value and 'xn--' not in value:
			return True
		if name == 'rel' and value == 'nofollow noopener': return True
		if name == 'target' and value == '_blank': return True

	if tag == 'img':
		if name in {'src','data-src'}: return is_safe_url(value)
		if name == 'loading' and value == 'lazy': return True
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name in {'g','b','glow'} and not value: return True
		if name in {'alt','title'}: return True
		if name == 'class' and value == 'img': return True

	if tag == 'lite-youtube':
		if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
		if name == 'videoid': return True

	if tag == 'video':
		if name == 'controls' and value == '': return True
		if name == 'preload' and value == 'none': return True
		if name == 'src': return is_safe_url(value)

	if tag == 'audio':
		if name == 'src': return is_safe_url(value)
		if name == 'controls' and value == '': return True
		if name == 'preload' and value == 'none': return True

	if tag == 'p':
		if name == 'class' and value in {'mb-0','resizable'}: return True

	if tag == 'span':
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name == 'title': return True
		if name == 'alt': return True

	if tag == 'table':
		if name == 'class' and value == 'table': return True

	return False

def build_url_re(tlds, protocols):
	"""Builds the url regex used by linkifier

	If you want a different set of tlds or allowed protocols, pass those in
	and stomp on the existing ``url_re``::

		from bleach import linkifier

		my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)

		linker = LinkifyFilter(url_re=my_url_re)

	"""
	return re.compile(
		r"""\(*# Match any opening parentheses.
		\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
		([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
		(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
			# /path/zz (excluding "unsafe" chars from RFC 1738,
			# except for ~, which happens in practice)
		(?:\#[^#\s\|\\\^\[\]`<>"]*)?
			# #hash (excluding "unsafe" chars from RFC 1738,
			# except for ~, which happens in practice)
		""".format(
			"|".join(sorted(protocols)), "|".join(sorted(tlds))
		),
		re.VERBOSE | re.UNICODE,
	)

url_re = build_url_re(tlds=TLDS, protocols=['http', 'https'])

def create_comment_duplicated(text_html):
	new_comment = Comment(author_id=AUTOJANNY_ID,
							parent_submission=None,
							body_html=text_html,
							distinguish_level=6,
							is_bot=True)
	g.db.add(new_comment)
	g.db.flush()

	new_comment.top_comment_id = new_comment.id

	return new_comment.id

def send_repeatable_notification_duplicated(uid, text):

	if uid in BOT_IDs: return

	text_html = sanitize(text)

	existing_comments = g.db.query(Comment.id).filter_by(author_id=AUTOJANNY_ID, parent_submission=None, body_html=text_html, is_bot=True).order_by(Comment.id).all()

	for c in existing_comments:
		existing_notif = g.db.query(Notification.user_id).filter_by(user_id=uid, comment_id=c.id).one_or_none()
		if not existing_notif:
			notif = Notification(comment_id=c.id, user_id=uid)
			g.db.add(notif)
			return

	cid = create_comment_duplicated(text_html)
	notif = Notification(comment_id=cid, user_id=uid)
	g.db.add(notif)


def execute_blackjack(v, target, body, type):
	if not blackjack or not body: return False

	execute = False
	for x in blackjack.split(','):
		if all(i in body.lower() for i in x.split()):
			execute = True

	if not execute: return False

	v.shadowbanned = AUTOJANNY_ID

	ma = ModAction(
		kind="shadowban",
		user_id=AUTOJANNY_ID,
		target_user_id=v.id,
		_note='reason: "Blackjack"'
	)
	g.db.add(ma)

	v.ban_reason = "Blackjack"
	g.db.add(v)

	notified_ids = [x[0] for x in g.db.query(User.id).filter(User.admin_level >= PERMS['BLACKJACK_NOTIFICATIONS'])]
	extra_info = type

	if target:
		if type == 'submission':
			extra_info = target.permalink
		elif type == 'flag':
			extra_info = f"reports on {target.permalink}"
		elif type in {'comment', 'message'}:
			for id in notified_ids:
				n = Notification(comment_id=target.id, user_id=id)
				g.db.add(n)

			extra_info = None

	if extra_info:
		for id in notified_ids:
			send_repeatable_notification_duplicated(id, f"Blackjack by @{v.username}: {extra_info}")
	return True

def render_emoji(html, regexp, golden, emojis_used, b=False):
	emojis = list(regexp.finditer(html))
	captured = set()

	for i in emojis:
		if i.group(0) in captured: continue
		captured.add(i.group(0))

		emoji = i.group(1).lower()
		attrs = ''
		if b: attrs += ' b'
		if golden and len(emojis) <= 20 and ('marsey' in emoji or emoji in marseys_const2):
			if random.random() < 0.0025: attrs += ' g'
			elif random.random() < 0.00125: attrs += ' glow'

		old = emoji
		emoji = emoji.replace('!','').replace('#','')
		if emoji == 'marseyrandom': emoji = random.choice(marseys_const2)

		emoji_partial_pat = '<img loading="lazy" alt=":{0}:" src="{1}"{2}>'
		emoji_partial = '<img loading="lazy" data-bs-toggle="tooltip" alt=":{0}:" title=":{0}:" src="{1}"{2}>'
		emoji_html = None

		if emoji.endswith('pat') and emoji != 'marseyunpettablepat':
			if path.isfile(f"files/assets/images/emojis/{emoji.replace('pat','')}.webp"):
				emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img loading="lazy" src="/i/hand.webp">{emoji_partial_pat.format(old, f"/e/{emoji[:-3]}.webp", attrs)}</span>'
			elif emoji.startswith('@'):
				if u := get_user(emoji[1:-3], graceful=True):
					emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img loading="lazy" src="/i/hand.webp">{emoji_partial_pat.format(old, f"/pp/{u.id}", attrs)}</span>'
		elif path.isfile(f'files/assets/images/emojis/{emoji}.webp'):
			emoji_html = emoji_partial.format(old, f'/e/{emoji}.webp', attrs)


		if emoji_html:
			emojis_used.add(emoji)
			html = re.sub(f'(?<!"){i.group(0)}(?![^<]*<\/(code|pre|a)>)', emoji_html, html)
	return html


def with_sigalrm_timeout(timeout: int):
	'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'

	# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
	# async so if we timeout on that (or on a db op) then the process is crashed without returning
	# a proper 500 error. Oh well.
	def sig_handler(signum, frame):
		print("Timeout!", flush=True)
		raise Exception("Timeout")

	def inner(func):
		@functools.wraps(func)
		def wrapped(*args, **kwargs):
			signal.signal(signal.SIGALRM, sig_handler)
			signal.alarm(timeout)
			try:
				return func(*args, **kwargs)
			finally:
				signal.alarm(0)
		return wrapped
	return inner


def sanitize_raw_title(sanitized:Optional[str]) -> str:
	if not sanitized: return ""
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r","").replace("\n", "").replace("𒐫", "").replace('\u202e','')
	sanitized = sanitized.strip()
	return sanitized[:POST_TITLE_LENGTH_LIMIT]

def sanitize_raw_body(sanitized:Optional[str], is_post:bool) -> str:
	if not sanitized: return ""
	sanitized = html_comment_regex.sub('', sanitized)

	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r\n", "\n").replace("𒐫", "").replace('\u202e','')
	sanitized = sanitized.strip()
	return sanitized[:POST_BODY_LENGTH_LIMIT(g.v) if is_post else COMMENT_BODY_LENGTH_LIMIT]


def sanitize_settings_text(sanitized:Optional[str], max_length:Optional[int]=None) -> str:
	if not sanitized: return ""
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r", "").replace("\n","")
	sanitized = sanitized.strip()
	if max_length: sanitized = sanitized[:max_length]
	return sanitized


def handle_youtube_links(url):
	html = None
	params = parse_qs(urlparse(url).query, keep_blank_values=True)

	id = params.get('v')

	if not id: return None

	id = id[0]

	t = None
	split = id.split('?t=')
	if len(split) == 2:
		id = split[0]
		t = split[1]

	id = id.split('?')[0]

	if yt_id_regex.fullmatch(id):
		if not t:
			t = params.get('t', params.get('start', [0]))[0]
		if isinstance(t, str):
			t = t.replace('s','').replace('S','')
			split = t.split('m')
			if len(split) == 2:
				minutes = int(split[0])
				seconds = int(split[1])
				t = minutes*60 + seconds
		html = f'<lite-youtube videoid="{id}" params="autoplay=1&modestbranding=1'
		if t:
			html += f'&start={int(t)}'
		html += '"></lite-youtube>'
	return html

@with_sigalrm_timeout(10)
def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_emojis=False, snappy=False, chat=False, blackjack=None):
	sanitized = sanitized.strip()
	if not sanitized: return ''

	if "style" in sanitized and "filter" in sanitized:
		if sanitized.count("blur(") + sanitized.count("drop-shadow(") > 5:
			abort(400, "Too many filters!")

	if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
		sanitized = 'g'

	sanitized = utm_regex.sub('', sanitized)
	sanitized = utm_regex2.sub('', sanitized)

	sanitized = normalize_url(sanitized)

	if '```' not in sanitized and '<pre>' not in sanitized:
		sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)

	sanitized = greentext_regex.sub(r'\1<g>\>\2</g>', sanitized)
	sanitized = image_regex.sub(r'\1![](\2)', sanitized)
	sanitized = image_check_regex.sub(r'\1', sanitized)
	sanitized = link_fix_regex.sub(r'\1https://\2', sanitized)

	if FEATURES['MARKUP_COMMANDS']:
		sanitized = command_regex.sub(command_regex_matcher, sanitized)

	sanitized = numbered_list_regex.sub(r'\1\. ', sanitized)

	sanitized = strikethrough_regex.sub(r'\1<del>\2</del>', sanitized)

	sanitized = markdown(sanitized)

	# replacing zero width characters, overlines, fake colons
	sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\u033f","").replace("\u0589", ":")

	sanitized = reddit_regex.sub(r'\1<a href="https://old.reddit.com/\2" rel="nofollow noopener" target="_blank">/\2</a>', sanitized)
	sanitized = sub_regex.sub(r'\1<a href="/\2">/\2</a>', sanitized)

	v = getattr(g, 'v', None)

	names = set(m.group(1) for m in mention_regex.finditer(sanitized))
	if limit_pings and len(names) > limit_pings and not v.admin_level >= PERMS['POST_COMMENT_INFINITE_PINGS']: abort(406)
	users_list = get_users(names, graceful=True)
	users_dict = {}
	for u in users_list:
		users_dict[u.username.lower()] = u
		if u.original_username:
			users_dict[u.original_username.lower()] = u
		if u.prelock_username:
			users_dict[u.prelock_username.lower()] = u

	def replacer(m):
		u = users_dict.get(m.group(1).lower())
		if not u or (v and u.id in v.all_twoway_blocks):
			return m.group(0)
		return f'<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>'

	sanitized = mention_regex.sub(replacer, sanitized)

	if FEATURES['PING_GROUPS']:
		def group_replacer(m):
			name = m.group(1).lower()

			if name == 'everyone':
				return f'<a href="/users">!{name}</a>'
			elif g.db.get(Group, name):
				return f'<a href="/!{name}">!{name}</a>'
			else:
				return m.group(0)

		sanitized = group_mention_regex.sub(group_replacer, sanitized)


	soup = BeautifulSoup(sanitized, 'lxml')

	for tag in soup.find_all("img"):
		if tag.get("src") and not tag["src"].startswith('/pp/'):
			if not is_safe_url(tag["src"]):
				a = soup.new_tag("a", href=tag["src"], rel="nofollow noopener", target="_blank")
				a.string = tag["src"]
				tag.replace_with(a)
				continue

			tag["loading"] = "lazy"
			tag["data-src"] = tag["src"]
			tag["src"] = f"{SITE_FULL_IMAGES}/i/l.webp"
			tag['alt'] = tag["data-src"]
			tag['class'] = "img"

			if tag.parent.name != 'a':
				a = soup.new_tag("a", href=tag["data-src"])
				if not is_site_url(a["href"]):
					a["rel"] = "nofollow noopener"
					a["target"] = "_blank"
				tag = tag.replace_with(a)
				a.append(tag)

			tag["data-src"] = tag["data-src"].replace('/giphy.webp', '/200w.webp')

	sanitized = str(soup).replace('<html><body>','').replace('</body></html>','')

	sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)

	emojis_used = set()

	emojis = list(emoji_regex.finditer(sanitized))
	if len(emojis) > 20: golden = False

	captured = []
	for i in emojis:
		if i.group(0) in captured: continue
		captured.append(i.group(0))

		old = i.group(0)
		if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old:
			new = old.lower().replace(">", " class='mb-0'>")
		else: new = old.lower()

		new = render_emoji(new, emoji_regex2, golden, emojis_used, True)

		sanitized = sanitized.replace(old, new)

	emojis = list(emoji_regex2.finditer(sanitized))
	if len(emojis) > 20: golden = False

	sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used)

	sanitized = sanitized.replace('&amp;','&')

	captured = []
	for i in youtube_regex.finditer(sanitized):
		if i.group(0) in captured: continue
		captured.append(i.group(0))

		html = handle_youtube_links(i.group(2))
		if html:
			sanitized = sanitized.replace(i.group(0), i.group(1) + html)

	sanitized = video_sub_regex.sub(r'\1<p class="resizable"><video controls preload="none" src="\2"></video></p>', sanitized)
	sanitized = audio_sub_regex.sub(r'\1<audio controls preload="none" src="\2"></audio>', sanitized)

	if count_emojis:
		for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)).all():
			emoji.count += 1
			g.db.add(emoji)

	sanitized = sanitized.replace('<p></p>', '')

	if g.v and g.v.agendaposter:
		allowed_css_properties = allowed_styles
	else:
		allowed_css_properties = allowed_styles + ["filter"]

	css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
	sanitized = bleach.Cleaner(tags=allowed_tags,
								attributes=allowed_attributes,
								protocols=['http', 'https'],
								css_sanitizer=css_sanitizer,
								filters=[partial(LinkifyFilter, skip_tags=["pre"],
									parse_email=False, url_re=url_re)]
								).clean(sanitized)

	#doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
	soup = BeautifulSoup(sanitized, 'lxml')

	links = soup.find_all("a")

	banned_domains = [x.domain for x in g.db.query(BannedDomain.domain).all()]

	for link in links:
		#remove empty links
		if not link.contents or not str(link.contents[0]).strip():
			link.extract()
			continue

		href = link.get("href")
		if not href: continue
		domain = tldextract.extract(href).registered_domain

		def unlinkfy():
			link.string = href
			del link["href"]

		#\ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
		if "\\" in href:
			unlinkfy()
			continue

		#don't allow something like this https://rdrama.net/post/78376/reminder-of-the-fact-that-our/2150032#context
		if domain and not allowed_domain_regex.fullmatch(domain):
			unlinkfy()
			continue

		#check for banned domain
		combined = (domain + urlparse(href).path).lower()
		if any((combined.startswith(x) for x in banned_domains)):
			unlinkfy()
			continue

		#don't allow something like this [https://rԁrama.net/leaderboard](https://iplogger.org/1fRKk7)
		if not snappy and tldextract.extract(str(link.string)).registered_domain:
			link.string = href

		#insert target="_blank" and ref="nofollower noopener" for external link
		if not href.startswith('/') and not href.startswith(f'{SITE_FULL}/'):
			link["target"] = "_blank"
			link["rel"] = "nofollow noopener"


	sanitized = str(soup).replace('<html><body>','').replace('</body></html>','')

	def error(error):
		if chat:
			return error, 403
		else:
			abort(403, error)


	if discord_username_regex.match(sanitized):
		return error("Stop grooming!")

	if '<pre>' not in sanitized and blackjack != "rules":
		sanitized = sanitized.replace('\n','')

	if showmore:
		# Insert a show more button if the text is too long or has too many paragraphs
		CHARLIMIT = 3000
		pos = 0
		for _ in range(20):
			pos = sanitized.find('</p>', pos + 4)
			if pos < 0:
				break
		if (pos < 0 and len(sanitized) > CHARLIMIT) or pos > CHARLIMIT:
			pos = CHARLIMIT - 500
		if pos >= 0:
			sanitized = (sanitized[:pos] + showmore_regex.sub(r'\1<p><button class="showmore">SHOW MORE</button></p><d class="d-none">\2</d>', sanitized[pos:], count=1))

	return sanitized.strip()

def allowed_attributes_emojis(tag, name, value):
	if tag == 'img':
		if name == 'src' and value.startswith('/') and '\\' not in value: return True
		if name == 'loading' and value == 'lazy': return True
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name in {'g','glow'} and not value: return True
		if name in {'alt','title'}: return True

	if tag == 'span':
		if name == 'data-bs-toggle' and value == 'tooltip': return True
		if name == 'title': return True
		if name == 'alt': return True
	return False


@with_sigalrm_timeout(1)
def filter_emojis_only(title, golden=True, count_emojis=False, graceful=False, strip=True):

	title = title.replace('‎','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace('<','&lt;').replace('>','&gt;').replace("﷽","")

	emojis_used = set()

	title = render_emoji(title, emoji_regex3, golden, emojis_used)

	if count_emojis:
		for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)).all():
			emoji.count += 1
			g.db.add(emoji)

	title = strikethrough_regex.sub(r'\1<del>\2</del>', title)

	title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')

	if strip:
		title = title.strip()

	if len(title) > POST_TITLE_HTML_LENGTH_LIMIT and not graceful: abort(400)
	else: return title

def normalize_url(url):
	url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)

	url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
			 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
			 .replace("https://www.youtube.com", "https://youtube.com") \
			 .replace("https://m.youtube.com", "https://youtube.com") \
			 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
			 .replace("https://youtube.com/v/", "https://youtube.com/watch?v=") \
			 .replace("https://mobile.twitter.com", "https://twitter.com") \
			 .replace("https://m.facebook.com", "https://facebook.com") \
			 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
			 .replace("https://www.twitter.com", "https://twitter.com") \
			 .replace("https://www.instagram.com", "https://instagram.com") \
			 .replace("https://www.tiktok.com", "https://tiktok.com") \
			 .replace("https://www.streamable.com", "https://streamable.com") \
			 .replace("https://streamable.com/", "https://streamable.com/e/") \
			 .replace("https://streamable.com/e/e/", "https://streamable.com/e/") \
			 .replace("https://search.marsey.cat/#", "https://camas.unddit.com/#") \
			 .replace("https://imgur.com/", "https://i.imgur.com/") \
			 .replace("https://nitter.net/", "https://twitter.com/") \
			 .replace("https://nitter.42l.fr/", "https://twitter.com/") \
			 .replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/") \
			 .replace("/giphy.gif", "/giphy.webp") \

	url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
	url = giphy_regex.sub(r'\1.webp', url)
	url = unquote(url)

	return url

def validate_css(css):
	if '@import' in css:
		return False, "CSS @import statements are not allowed!"

	if '/*' in css:
		return False, "CSS comments are not allowed!"

	for i in css_url_regex.finditer(css):
		url = i.group(1)
		if not is_safe_url(url):
			domain = tldextract.extract(url).registered_domain
			return False, f"The domain '{domain}' is not allowed, please use one of these domains\n\n{approved_embed_hosts}."

	return True, ""


def torture_ap(string, username):
	if not string: return string
	for k, l in AJ_REPLACEMENTS.items():
		string = string.replace(k, l)
	string = torture_regex.sub(rf'\1@{username}\3', string)
	string = torture_regex2.sub(rf'\1@{username} is\3', string)
	string = torture_regex3.sub(rf"\1@{username}'s\3", string)
	return string

def complies_with_chud(obj):
	#check for cases where u should leave
	if not obj.author.agendaposter: return True
	if obj.author.marseyawarded: return True
	if isinstance(obj, Submission):
		if obj.id in ADMIGGER_THREADS: return True
		if obj.sub == "chudrama": return True
	elif obj.parent_submission:
		if obj.parent_submission in ADMIGGER_THREADS: return True
		if obj.post.sub == "chudrama": return True

	#perserve old body_html to be used in checking for chud phrase
	old_body_html = obj.body_html

	#torture body_html
	if obj.body_html and '<p>&amp;&amp;' not in obj.body_html and '<p>$$' not in obj.body_html and '<p>##' not in obj.body_html:
		soup = BeautifulSoup(obj.body_html, 'lxml')
		tags = soup.html.body.find_all(lambda tag: tag.name not in {'blockquote','codeblock','pre'} and tag.string, recursive=False)
		for tag in tags:
			tag.string.replace_with(torture_ap(tag.string, obj.author.username))
		obj.body_html = str(soup).replace('<html><body>','').replace('</body></html>','')

	#torture title_html and check for agendaposter_phrase in plain title and leave if it's there
	if isinstance(obj, Submission):
		obj.title_html = torture_ap(obj.title_html, obj.author.username)
		if obj.author.agendaposter_phrase in obj.title.lower():
			return True

	#check for agendaposter_phrase in body_html
	if old_body_html:
		excluded_tags = {'del','sub','sup','marquee','spoiler','lite-youtube','video','audio'}
		soup = BeautifulSoup(old_body_html, 'lxml')
		tags = soup.html.body.find_all(lambda tag: tag.name not in excluded_tags and not tag.attrs, recursive=False)
		for tag in tags:
			for text in tag.find_all(text=True, recursive=False):
				if obj.author.agendaposter_phrase in text.lower():
					return True

	return False
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								import functools
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								import random
 								import re
 								import signal
 								from functools import partial
-												more chud images

											
										
										
											2023-01-01 07:55:22 +00:00
+								from os import path, listdir
-												add functionality to disable poll formatting (#35)

Co-authored-by: justcool393 <justcool393@gmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/35
Co-authored-by: justcool393 <justcool393@noreply.fsdfsd.net>
Co-committed-by: justcool393 <justcool393@noreply.fsdfsd.net>

											
										
										
											2022-12-04 21:46:27 +00:00
+								from typing import Any
-												fix this https://rdrama.net/post/167983/what-happened-to-jon-stewart-gets/4096939#context

											
										
										
											2023-05-05 00:23:54 +00:00
+								from urllib.parse import parse_qs, urlparse, unquote
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								import bleach
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+								from bleach.css_sanitizer import CSSSanitizer
-												fix camas.unddit.com

											
										
										
											2022-07-15 13:27:45 +00:00
+								from bleach.linkifier import LinkifyFilter
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								from bs4 import BeautifulSoup
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								from mistletoe import markdown
-												link to ping groups

											
										
										
											2023-02-25 22:06:49 +00:00
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								from files.classes.domains import BannedDomain
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+								from files.classes.mod_logs import ModAction
 								from files.classes.notifications import Notification
-												link to ping groups

											
										
										
											2023-02-25 22:06:49 +00:00
+								from files.classes.group import Group
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
-												[DO NOT MERGE] multiple sub banners (#59)

allows multiple sub banners

Snakes note: By request of Carp, especially for WPD.

Co-authored-by: justcool393 <justcool393@gmail.com>
Co-authored-by: Snakes <duolsm@outlook.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/59
Co-authored-by: justcool393 <justcool393@noreply.fsdfsd.net>
Co-committed-by: justcool393 <justcool393@noreply.fsdfsd.net>

											
										
										
											2022-12-11 23:44:34 +00:00
+								from files.helpers.config.const import *
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+								from files.helpers.const_stateful import *
 								from files.helpers.regex import *
-												fix unimported push_notif

											
										
										
											2023-03-09 22:32:31 +00:00
+								from files.helpers.get import *
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								TLDS = ( # Original gTLDs and ccTLDs
 									'ac','ad','ae','aero','af','ag','ai','al','am','an','ao','aq','ar','arpa','as','asia','at',
 									'au','aw','ax','az','ba','bb','bd','be','bf','bg','bh','bi','biz','bj','bm','bn','bo','br',
 									'bs','bt','bv','bw','by','bz','ca','cafe','cat','cc','cd','cf','cg','ch','ci','ck','cl',
 									'cm','cn','co','com','coop','cr','cu','cv','cx','cy','cz','de','dj','dk','dm','do','dz','ec',
 									'edu','ee','eg','er','es','et','eu','fi','fj','fk','fm','fo','fr','ga','gb','gd','ge','gf',
 									'gg','gh','gi','gl','gm','gn','gov','gp','gq','gr','gs','gt','gu','gw','gy','hk','hm','hn',
 									'hr','ht','hu','id','ie','il','im','in','info','int','io','iq','ir','is','it','je','jm','jo',
 									'jobs','jp','ke','kg','kh','ki','km','kn','kp','kr','kw','ky','kz','la','lb','lc','li','lk',
 									'lr','ls','lt','lu','lv','ly','ma','mc','md','me','mg','mh','mil','mk','ml','mm','mn','mo',
 									'mobi','mp','mq','mr','ms','mt','mu','museum','mv','mw','mx','my','mz','na','name',
 									'nc','ne','net','nf','ng','ni','nl','no','np','nr','nu','nz','om','org','pa','pe','pf','pg',
 									'ph','pk','pl','pm','pn','post','pr','pro','ps','pt','pw','py','qa','re','ro','rs','ru','rw',
 									'sa','sb','sc','sd','se','sg','sh','si','sj','sk','sl','sm','sn','so','social','sr','ss','st',
 									'su','sv','sx','sy','sz','tc','td','tel','tf','tg','th','tj','tk','tl','tm','tn','to','tp',
 									'tr','travel','tt','tv','tw','tz','ua','ug','uk','us','uy','uz','va','vc','ve','vg','vi','vn',
 									'vu','wf','ws','xn','xxx','ye','yt','yu','za','zm','zw',
 									# New gTLDs
 									'app','cleaning','club','dev','farm','florist','fun','gay','lgbt','life','lol',
-												add 'blog' tld (#108)

https://scottaaronson.blog/ is a thing, for example. We must support the thing.

Add .blog as an approved TLD.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/108
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-02-01 23:20:08 +00:00
+									'moe','mom','monster','new','news','online','pics','press','pub','site','blog',
-												add .goog to supported tlds

											
										
										
											2023-04-24 08:18:55 +00:00
+									'vip','win','world','wtf','xyz','video','host','art','media','wiki','tech',
-												add .markets to supported tlds

											
										
										
											2023-04-24 08:20:24 +00:00
+									'cooking','network','party','goog','markets',
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+									)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												sanitize: Add more neo-gTLDs to TLDs tuple-list.

Per https://rdrama.net/post/70341/-/1976650 added more gTLDs that
are actually desired by site users.

Also, hard wrapped the `TLDS` and `allowed_tags` tuple-lists at a
100char hard ruler for my sanity.

											
										
										
											2022-05-24 19:16:55 +00:00
+								allowed_tags = ('b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i',
 									'li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul',
-												do this https://rdrama.net/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/3909706#context

											
										
										
											2023-04-24 07:31:45 +00:00
+									'marquee','a','span','ruby','rp','rt','spoiler','img','lite-youtube','video','audio','g','u')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix this https://rdrama.net/h/changelog/post/157290/rdramanet-is-proud-to-welcome-the/3852376#context

											
										
										
											2023-03-24 11:31:12 +00:00
+								allowed_styles = ['color', 'background-color', 'font-weight', 'text-align']
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+								def allowed_attributes(tag, name, value):
 									if name == 'style': return True
 									if tag == 'marquee':
-												use sets instead of lists in some statements

											
										
										
											2022-11-26 04:52:47 +00:00
+										if name in {'direction', 'behavior', 'scrollamount'}: return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										if name in {'height', 'width'}:
 											try: value = int(value.replace('px', ''))
 											except: return False
 											if 0 < value <= 250: return True
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									if tag == 'a':
-												Remove !YOU!.

Security mess and stale joke.

											
										
										
											2022-12-06 01:06:04 +00:00
+										if name == 'href' and '\\' not in value and 'xn--' not in value:
-												allow only ascii characters in links (https://rdrama.net/comment/2150032)

											
										
										
											2022-06-19 17:25:55 +00:00
+											return True
-												remove redundant "noreferrer"

											
										
										
											2022-10-29 21:46:30 +00:00
+										if name == 'rel' and value == 'nofollow noopener': return True
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+										if name == 'target' and value == '_blank': return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									if tag == 'img':
-												use sets instead of lists in some statements

											
										
										
											2022-11-26 04:52:47 +00:00
+										if name in {'src','data-src'}: return is_safe_url(value)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										if name == 'loading' and value == 'lazy': return True
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
-												use sets instead of lists in some statements

											
										
										
											2022-11-26 04:52:47 +00:00
+										if name in {'g','b','glow'} and not value: return True
 										if name in {'alt','title'}: return True
-												use .img class

											
										
										
											2023-03-12 13:13:28 +00:00
+										if name == 'class' and value == 'img': return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									if tag == 'lite-youtube':
 										if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True
 										if name == 'videoid': return True
 									if tag == 'video':
 										if name == 'controls' and value == '': return True
 										if name == 'preload' and value == 'none': return True
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src': return is_safe_url(value)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												Add <audio> support to sanitize.py.

Parallels the decisions made with <video>.

											
										
										
											2022-05-15 22:47:37 +00:00
+									if tag == 'audio':
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src': return is_safe_url(value)
-												Add <audio> support to sanitize.py.

Parallels the decisions made with <video>.

											
										
										
											2022-05-15 22:47:37 +00:00
+										if name == 'controls' and value == '': return True
 										if name == 'preload' and value == 'none': return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									if tag == 'p':
-												use sets with in statements

											
										
										
											2022-12-23 22:22:41 +00:00
+										if name == 'class' and value in {'mb-0','resizable'}: return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									if tag == 'span':
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
 										if name == 'title': return True
 										if name == 'alt': return True
-												whitelist table class for tables in sanitizer

											
										
										
											2022-12-09 21:04:22 +00:00
+									if tag == 'table':
 										if name == 'class' and value == 'table': return True
-												get rid of useless tabs and spaces

											
										
										
											2023-01-01 11:36:20 +00:00
-												resizable videos

											
										
										
											2022-12-10 19:12:14 +00:00
+									return False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								def build_url_re(tlds, protocols):
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+									"""Builds the url regex used by linkifier
 									If you want a different set of tlds or allowed protocols, pass those in
 									and stomp on the existing ``url_re``::
 										from bleach import linkifier
 										my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)
 										linker = LinkifyFilter(url_re=my_url_re)
 									"""
 									return re.compile(
 										r"""\(*# Match any opening parentheses.
 										\b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?# http://
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+										([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b# xx.yy.tld(:##)?
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+										(?:[/?][^#\s\{{\}}\|\\\^\[\]`<>"]*)?
 											# /path/zz (excluding "unsafe" chars from RFC 1738,
 											# except for ~, which happens in practice)
 										(?:\#[^#\s\|\\\^\[\]`<>"]*)?
 											# #hash (excluding "unsafe" chars from RFC 1738,
 											# except for ~, which happens in practice)
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+										""".format(
 											"|".join(sorted(protocols)), "|".join(sorted(tlds))
 										),
-												tlds should be lowercase always - fixes issue with ppl forgetting space after dot while typing posts or comments

											
										
										
											2023-05-03 18:06:25 +00:00
+										re.VERBOSE | re.UNICODE,
-												casino + style shit

											
										
										
											2022-09-04 23:15:37 +00:00
+									)
-												fix camas.unddit.com

											
										
										
											2022-07-15 13:27:45 +00:00
-												go back to the old TLD system in sanitize

											
										
										
											2022-11-02 07:08:02 +00:00
+								url_re = build_url_re(tlds=TLDS, protocols=['http', 'https'])
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix 500 error

											
										
										
											2023-05-03 14:12:12 +00:00
+								def create_comment_duplicated(text_html):
 									new_comment = Comment(author_id=AUTOJANNY_ID,
 															parent_submission=None,
 															body_html=text_html,
 															distinguish_level=6,
 															is_bot=True)
 									g.db.add(new_comment)
 									g.db.flush()
 									new_comment.top_comment_id = new_comment.id
 									return new_comment.id
 								def send_repeatable_notification_duplicated(uid, text):
-												replace "bots" with "BOT_IDs"

											
										
										
											2023-05-12 22:29:34 +00:00
+									if uid in BOT_IDs: return
-												fix 500 error

											
										
										
											2023-05-03 14:12:12 +00:00
 									text_html = sanitize(text)
 									existing_comments = g.db.query(Comment.id).filter_by(author_id=AUTOJANNY_ID, parent_submission=None, body_html=text_html, is_bot=True).order_by(Comment.id).all()
 									for c in existing_comments:
 										existing_notif = g.db.query(Notification.user_id).filter_by(user_id=uid, comment_id=c.id).one_or_none()
 										if not existing_notif:
 											notif = Notification(comment_id=c.id, user_id=uid)
 											g.db.add(notif)
 											return
 									cid = create_comment_duplicated(text_html)
 									notif = Notification(comment_id=cid, user_id=uid)
 									g.db.add(notif)
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+								def execute_blackjack(v, target, body, type):
 									if not blackjack or not body: return False
 									execute = False
 									for x in blackjack.split(','):
 										if all(i in body.lower() for i in x.split()):
 											execute = True
 									if not execute: return False
-												always shadow

											
										
										
											2023-05-03 15:38:45 +00:00
+									v.shadowbanned = AUTOJANNY_ID
 									ma = ModAction(
 										kind="shadowban",
 										user_id=AUTOJANNY_ID,
 										target_user_id=v.id,
 										_note='reason: "Blackjack"'
 									)
 									g.db.add(ma)
 									v.ban_reason = "Blackjack"
 									g.db.add(v)
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
-												revert sqlalchemy changes

											
										
										
											2023-03-16 06:27:58 +00:00
+									notified_ids = [x[0] for x in g.db.query(User.id).filter(User.admin_level >= PERMS['BLACKJACK_NOTIFICATIONS'])]
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+									extra_info = type
 									if target:
 										if type == 'submission':
 											extra_info = target.permalink
 										elif type == 'flag':
 											extra_info = f"reports on {target.permalink}"
 										elif type in {'comment', 'message'}:
 											for id in notified_ids:
 												n = Notification(comment_id=target.id, user_id=id)
-												revert sqlalchemy changes

											
										
										
											2023-03-16 06:27:58 +00:00
+												g.db.add(n)
-												fix unimported push_notif

											
										
										
											2023-03-09 22:32:31 +00:00
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+											extra_info = None
 									if extra_info:
 										for id in notified_ids:
-												fix 500 error

											
										
										
											2023-05-03 14:12:12 +00:00
+											send_repeatable_notification_duplicated(id, f"Blackjack by @{v.username}: {extra_info}")
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+									return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+								def render_emoji(html, regexp, golden, emojis_used, b=False):
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									emojis = list(regexp.finditer(html))
 									captured = set()
 									for i in emojis:
 										if i.group(0) in captured: continue
 										captured.add(i.group(0))
 										emoji = i.group(1).lower()
 										attrs = ''
 										if b: attrs += ' b'
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+										if golden and len(emojis) <= 20 and ('marsey' in emoji or emoji in marseys_const2):
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+											if random.random() < 0.0025: attrs += ' g'
 											elif random.random() < 0.00125: attrs += ' glow'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										old = emoji
 										emoji = emoji.replace('!','').replace('#','')
-												[DO NOT MERGE] import detanglation (#442)

* move Base definition to files.classes.__init__.py

* fix ImportError

* move userpage listing to users.py

* don't import the app from classes

* consts: set default values to avoid crashes
consts: warn if the secret key is the default config value

* card view: sneed (user db schema)

* cloudflare: use DEFAULT_CONFIG_VALUE

* const: set default values

* decouple media.py from __main__

* pass database to avoid imports

* import cleanup and import request not in const, but in the requests mega import

* move asset_submissions site check to __init__

* asset submissions feature flag

* flag

* g.is_tor

* don't import request where it's not needed

* i think this is fine

* mail: move to own routes and helper

* wrappers

* required wrappers move

* unfuck wrappers a bit

* move snappy quotes and marseys to stateful consts

* marsify

* :pepodrool:

* fix missing import

* import cache

* ...and settings.py

* and static.py

* static needs cache

* route

* lmao all of the jinja shit was in feeds.py amazing

* classes should only import what they need from flask

* import Response

* hdjbjdhbhjf

* ...

* dfdfdfdf

* make get a non-required import

* isort imports (mostly)

* but actually

* configs

* reload config on import

* fgfgfgfg

* config

* config

* initialize snappy and test

* cookie of doom debug

* edfjnkf

* xikscdfd

* debug config

* set session cookie domain, i think this fixes the can't login bug

* sdfbgnhvfdsghbnjfbdvvfghnn

* hrsfxgf

* dump the entire config on a request

* kyskyskyskyskyskyskyskyskys

* duifhdskfjdfd

* dfdfdfdfdfdfdfdfdfdfdfdf

* dfdfdfdf

* imoprt all of the consts beacuse fuck it

* 😭

* dfdfdfdfdfdfsdasdf

* print the entire session

* rffdfdfjkfksj

* fgbhffh

* not the secret keys

* minor bug fixes

* be helpful in the warning

* gfgfgfg

* move warning lower

* isort main imports (i hope this doesn't fuck something up)

* test

* session cookie domain redux

* dfdfdfd

* try only importing Flask

* formkeys fix

* y

* :pepodrool:

* route helper

* remove before flight

* dfdfdfdfdf

* isort classes

* isort helpers

* move check_for_alts to routehelpers and also sort imports and get rid of unused ones

* that previous commit but actkally

* readd the cache in a dozen places they were implicitly imported

* use g.is_tor instead of request.headers. bla bla bla

* upgrade streamers to their own route file

* get rid of unused imports in __main__

* fgfgf

* don't pull in the entire ORM where we don't need it

* features

* explicit imports for the get helper

* explicit imports for the get helper redux

* testing allroutes

* remove unused import

* decouple flask from classes

* syntax fix also remember these have side fx for some reason (why?)

* move side effects out of the class

* posts

* testing on devrama

* settings

* reloading

* settingssdsdsds

* streamer features

* site settings

* testing settings on devrama

* import

* fix modlog

* remove debug stuff

* revert commit 67275b21ab6e2f2520819e84d10bfc1c746a15b6

* archiveorg to _archiveorg

* skhudkfkjfd

* fix cron for PCM

* fix bugs that snekky wants me to

* Fix call to realbody passing db, standardize kwarg

* test

* import check_for_alts from the right place

* cloudflare

* testing on devrama

* fix cron i think

* shadow properly

* tasks

* Remove print which will surely be annoying in prod.

* v and create new session

* use files.classes

* make errors import little and fix rare 500 in /allow_nsfw

* Revert "use files.classes"

This reverts commit 98c10b876cf86ce058b7fb955cf1ec0bfb9996c6.

* pass v to media functions rather than using g

* fix

* dfdfdfdfd

* cleanup, py type checking is dumb so don't use it where it causes issues

* Fix some merge bugs, add DEFAULT_RATELIMIT to main.

* Fix imports on sqlalchemy expressions.

* `from random import random` is an error.

* Fix replies db param.

* errors: fix missing import

* fix rare 500: only send to GIFT_NOTIF_ID if it exists, and send them the right text

* Fix signup formkey.

* fix 2 500s

* propagate db to submissions

* fix replies

* dfdfdfdf

* Fix verifiedcolor.

* is_manual

* can't use getters outside of an app context

* don't attempt to do gumroad on sites where it's not enabled

* don't attempt to do gumraod on sites's where it's unnecessary

* Revert "don't attempt to do gumroad on sites where it's not enabled"

This reverts commit 6f8a6331878655492dfaf1907b27f8be513c14d3.

* fix 500

* validate media type

Co-authored-by: TLSM <duolsm@outlook.com>
											
										
										
											2022-11-15 09:19:08 +00:00
+										if emoji == 'marseyrandom': emoji = random.choice(marseys_const2)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										emoji_partial_pat = '<img loading="lazy" alt=":{0}:" src="{1}"{2}>'
 										emoji_partial = '<img loading="lazy" data-bs-toggle="tooltip" alt=":{0}:" title=":{0}:" src="{1}"{2}>'
 										emoji_html = None
-												make :marseyunpettable: unpettable

											
										
										
											2022-07-08 15:39:54 +00:00
+										if emoji.endswith('pat') and emoji != 'marseyunpettablepat':
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											if path.isfile(f"files/assets/images/emojis/{emoji.replace('pat','')}.webp"):
-												make all images have loading="lazy"

											
										
										
											2023-01-27 17:55:25 +00:00
+												emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img loading="lazy" src="/i/hand.webp">{emoji_partial_pat.format(old, f"/e/{emoji[:-3]}.webp", attrs)}</span>'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											elif emoji.startswith('@'):
 												if u := get_user(emoji[1:-3], graceful=True):
-												make all images have loading="lazy"

											
										
										
											2023-01-27 17:55:25 +00:00
+													emoji_html = f'<span data-bs-toggle="tooltip" alt=":{old}:" title=":{old}:"><img loading="lazy" src="/i/hand.webp">{emoji_partial_pat.format(old, f"/pp/{u.id}", attrs)}</span>'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										elif path.isfile(f'files/assets/images/emojis/{emoji}.webp'):
 											emoji_html = emoji_partial.format(old, f'/e/{emoji}.webp', attrs)
 										if emoji_html:
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+											emojis_used.add(emoji)
-												Revert "fix this https://stupidpol.site/post/149214/food-wars-ii-marseychingchong-strikes-back/3642965#context"

This reverts commit afd6949998bb06c3702cb568be9fffc021b94faa.

											
										
										
											2023-02-23 23:38:00 +00:00
+											html = re.sub(f'(?<!"){i.group(0)}(?![^<]*<\/(code|pre|a)>)', emoji_html, html)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									return html
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								def with_sigalrm_timeout(timeout: int):
 									'Use SIGALRM to raise an exception if the function executes for longer than timeout seconds'
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+									# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
 									# async so if we timeout on that (or on a db op) then the process is crashed without returning
 									# a proper 500 error. Oh well.
 									def sig_handler(signum, frame):
 										print("Timeout!", flush=True)
 										raise Exception("Timeout")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+									def inner(func):
-												sanitize.with_sigalrm_timeout: functools.wrap fix.

											
										
										
											2022-07-06 09:01:48 +00:00
+										@functools.wraps(func)
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+										def wrapped(*args, **kwargs):
 											signal.signal(signal.SIGALRM, sig_handler)
 											signal.alarm(timeout)
 											try:
 												return func(*args, **kwargs)
 											finally:
 												signal.alarm(0)
 										return wrapped
 									return inner
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_raw_title(sanitized:Optional[str]) -> str:
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									if not sanitized: return ""
-												fix this https://rdrama.net/h/changelog/post/157290/rdramanet-is-proud-to-welcome-the/3852530#context

											
										
										
											2023-03-24 11:43:15 +00:00
+									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r","").replace("\n", "").replace("𒐫", "").replace('\u202e','')
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
+									sanitized = sanitized.strip()
-												introduce constant for post and title length

											
										
										
											2022-10-05 08:35:35 +00:00
+									return sanitized[:POST_TITLE_LENGTH_LIMIT]
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_raw_body(sanitized:Optional[str], is_post:bool) -> str:
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									if not sanitized: return ""
-												sanitize: strip out html comments
they're unnecessary and never get rendered

											
										
										
											2022-10-20 23:06:55 +00:00
+									sanitized = html_comment_regex.sub('', sanitized)
-												fix this https://rdrama.net/h/changelog/post/157290/rdramanet-is-proud-to-welcome-the/3852530#context

											
										
										
											2023-03-24 11:43:15 +00:00
 									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r\n", "\n").replace("𒐫", "").replace('\u202e','')
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
+									sanitized = sanitized.strip()
-												do this https://stupidpol.site/h/countryclub/post/79285/-/3681172#context

											
										
										
											2023-02-28 19:36:14 +00:00
+									return sanitized[:POST_BODY_LENGTH_LIMIT(g.v) if is_post else COMMENT_BODY_LENGTH_LIMIT]
-												sanitize raw bodies

											
										
										
											2022-10-05 08:16:56 +00:00
-												improve raw title sanitization and don't check the same thing like 5 times

											
										
										
											2022-10-05 08:04:32 +00:00
-												sanitize, fix bug with update_flag, and update copy for low tsfriends

											
										
										
											2022-11-07 00:40:51 +00:00
+								def sanitize_settings_text(sanitized:Optional[str], max_length:Optional[int]=None) -> str:
 									if not sanitized: return ""
 									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\r", "").replace("\n","")
 									sanitized = sanitized.strip()
 									if max_length: sanitized = sanitized[:max_length]
 									return sanitized
-												rename handle_youtube to handle_youtube_links

											
										
										
											2023-01-25 11:17:12 +00:00
+								def handle_youtube_links(url):
-												fix youtube timestamps

											
										
										
											2023-01-23 02:06:56 +00:00
+									html = None
 									params = parse_qs(urlparse(url).query, keep_blank_values=True)
-												fix 500 error

											
										
										
											2023-01-28 10:42:45 +00:00
 									id = params.get('v')
 									if not id: return None
 									id = id[0]
-												fix youtube timestamps

											
										
										
											2023-01-23 02:06:56 +00:00
 									t = None
 									split = id.split('?t=')
 									if len(split) == 2:
 										id = split[0]
 										t = split[1]
-												fix this https://stupidpol.site/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/3645817#context

											
										
										
											2023-02-24 00:46:39 +00:00
+									id = id.split('?')[0]
-												fix youtube timestamps

											
										
										
											2023-01-23 02:06:56 +00:00
+									if yt_id_regex.fullmatch(id):
 										if not t:
 											t = params.get('t', params.get('start', [0]))[0]
-												fix youtube edge case

											
										
										
											2023-01-25 11:16:59 +00:00
+										if isinstance(t, str):
-												fix youtube timestamp edge case

											
										
										
											2023-03-12 19:07:23 +00:00
+											t = t.replace('s','').replace('S','')
-												fix youtube edge case

											
										
										
											2023-01-25 11:16:59 +00:00
+											split = t.split('m')
 											if len(split) == 2:
 												minutes = int(split[0])
 												seconds = int(split[1])
 												t = minutes*60 + seconds
-												fix youtube timestamps

											
										
										
											2023-01-23 02:06:56 +00:00
+										html = f'<lite-youtube videoid="{id}" params="autoplay=1&modestbranding=1'
 										if t:
 											html += f'&start={int(t)}'
 										html += '"></lite-youtube>'
 									return html
-												increase sanitize timeout from 5 seconds to 10 seconds

											
										
										
											2022-12-15 19:31:30 +00:00
+								@with_sigalrm_timeout(10)
-												use css for chud images instead

											
										
										
											2023-03-24 12:29:19 +00:00
+								def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_emojis=False, snappy=False, chat=False, blackjack=None):
-												allow uploading of all types of files (using lain.la)
+ allow multiple file upload in bios and messaging admins

											
										
										
											2022-06-18 15:53:34 +00:00
+									sanitized = sanitized.strip()
-												minor improvement

											
										
										
											2023-03-26 12:57:03 +00:00
+									if not sanitized: return ''
-												allow uploading of all types of files (using lain.la)
+ allow multiple file upload in bios and messaging admins

											
										
										
											2022-06-18 15:53:34 +00:00
-												disable browser-killing exploit

											
										
										
											2023-05-05 00:07:25 +00:00
+									if "style" in sanitized and "filter" in sanitized:
 										if sanitized.count("blur(") + sanitized.count("drop-shadow(") > 5:
 											abort(400, "Too many filters!")
-												fix last commit

											
										
										
											2023-03-11 07:36:41 +00:00
+									if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+										sanitized = 'g'
-												fix utm regex

											
										
										
											2022-09-23 13:23:11 +00:00
+									sanitized = utm_regex.sub('', sanitized)
 									sanitized = utm_regex2.sub('', sanitized)
-												fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2178607?context=8#context

											
										
										
											2022-06-23 19:43:49 +00:00
+									sanitized = normalize_url(sanitized)
-												sfd

											
										
										
											2022-05-27 18:28:54 +00:00
+									if '```' not in sanitized and '<pre>' not in sanitized:
-												sfd

											
										
										
											2022-05-08 09:06:01 +00:00
+										sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												greentext fix

											
										
										
											2022-06-19 15:22:06 +00:00
+									sanitized = greentext_regex.sub(r'\1<g>\>\2</g>', sanitized)
-												fix images again

											
										
										
											2023-03-15 02:46:54 +00:00
+									sanitized = image_regex.sub(r'\1![](\2)', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = image_check_regex.sub(r'\1', sanitized)
-												Fix timeout in sanitize from link_fix_regex.

h/t to @official-techsupport for finding and help fixing this bug.
When given certain pathological input, `sanitize` would time out
(notably only on posts, rather than comments, perhaps due to the
longer maximum length of input). For example, using as input the
result of:

    with open("test.txt", "w") as f:
        for i in range(26):
            f.write(f":{chr(ord('a') + i)}: ")
        f.write('x' * 20_000)

We believe this to be because of some combination of the greedy
quantifiers and the negative lookahead before the match. The regex
was rewritten to (in theory) have much more linear performance.

											
										
										
											2022-06-25 05:28:43 +00:00
+									sanitized = link_fix_regex.sub(r'\1https://\2', sanitized)
-												fg

											
										
										
											2022-05-07 05:28:51 +00:00
-												LGB: disable markup commands.

											
										
										
											2022-07-20 00:07:38 +00:00
+									if FEATURES['MARKUP_COMMANDS']:
 										sanitized = command_regex.sub(command_regex_matcher, sanitized)
-												refactor comment commands

											
										
										
											2022-07-11 12:14:18 +00:00
-												disable numbered lists

											
										
										
											2023-02-01 15:59:10 +00:00
+									sanitized = numbered_list_regex.sub(r'\1\. ', sanitized)
-												fix strikethrough, v2

											
										
										
											2022-06-28 05:52:29 +00:00
+									sanitized = strikethrough_regex.sub(r'\1<del>\2</del>', sanitized)
-												fix snappy quote

											
										
										
											2023-03-12 09:30:22 +00:00
+									sanitized = markdown(sanitized)
-												sanitize: replace cuneiform ban with an overline ban

											
										
										
											2022-10-20 14:44:32 +00:00
+									# replacing zero width characters, overlines, fake colons
 									sanitized = sanitized.replace('\u200e','').replace('\u200b','').replace("\ufeff", "").replace("\u033f","").replace("\u0589", ":")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+									sanitized = reddit_regex.sub(r'\1<a href="https://old.reddit.com/\2" rel="nofollow noopener" target="_blank">/\2</a>', sanitized)
-												remove the need for alert flag on sanitize()

											
										
										
											2022-06-22 22:12:47 +00:00
+									sanitized = sub_regex.sub(r'\1<a href="/\2">/\2</a>', sanitized)
-												make non-jannies unable to ping more than 3 ppl

											
										
										
											2022-07-29 13:23:34 +00:00
+									v = getattr(g, 'v', None)
-												fix pinging

											
										
										
											2023-03-12 14:54:03 +00:00
+									names = set(m.group(1) for m in mention_regex.finditer(sanitized))
-												pings bypass permission

											
										
										
											2022-10-12 09:36:29 +00:00
+									if limit_pings and len(names) > limit_pings and not v.admin_level >= PERMS['POST_COMMENT_INFINITE_PINGS']: abort(406)
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
+									users_list = get_users(names, graceful=True)
 									users_dict = {}
 									for u in users_list:
 										users_dict[u.username.lower()] = u
 										if u.original_username:
 											users_dict[u.original_username.lower()] = u
-												add namelock award

											
										
										
											2023-05-13 04:53:14 +00:00
+										if u.prelock_username:
 											users_dict[u.prelock_username.lower()] = u
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
 									def replacer(m):
-												fix pinging

											
										
										
											2023-03-12 14:54:03 +00:00
+										u = users_dict.get(m.group(1).lower())
-												fix 500 error

											
										
										
											2023-04-25 16:27:00 +00:00
+										if not u or (v and u.id in v.all_twoway_blocks):
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
+											return m.group(0)
-												fix pinging

											
										
										
											2023-03-12 14:54:03 +00:00
+										return f'<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>'
-												steal the parts I wanted from spidey's PR

											
										
										
											2022-08-21 17:20:09 +00:00
 									sanitized = mention_regex.sub(replacer, sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												link to ping groups

											
										
										
											2023-02-25 22:06:49 +00:00
+									if FEATURES['PING_GROUPS']:
-												fix repeated group pings

											
										
										
											2023-04-25 06:59:20 +00:00
+										def group_replacer(m):
 											name = m.group(1).lower()
-												add !everyone

											
										
										
											2023-03-01 05:32:19 +00:00
+											if name == 'everyone':
-												fix repeated group pings

											
										
										
											2023-04-25 06:59:20 +00:00
+												return f'<a href="/users">!{name}</a>'
 											elif g.db.get(Group, name):
 												return f'<a href="/!{name}">!{name}</a>'
-												add !everyone

											
										
										
											2023-03-01 05:32:19 +00:00
+											else:
-												fix repeated group pings

											
										
										
											2023-04-25 06:59:20 +00:00
+												return m.group(0)
 										sanitized = group_mention_regex.sub(group_replacer, sanitized)
-												link to ping groups

											
										
										
											2023-02-25 22:06:49 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									soup = BeautifulSoup(sanitized, 'lxml')
 									for tag in soup.find_all("img"):
 										if tag.get("src") and not tag["src"].startswith('/pp/'):
-												add more image validation (to fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2298173?context=8#context)

											
										
										
											2022-07-12 20:30:00 +00:00
+											if not is_safe_url(tag["src"]):
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+												a = soup.new_tag("a", href=tag["src"], rel="nofollow noopener", target="_blank")
-												add more image validation (to fix this https://chapotraphouse.club/post/18459/marseycapywalking-megathread-for-bugs-and-suggestions/2298173?context=8#context)

											
										
										
											2022-07-12 20:30:00 +00:00
+												a.string = tag["src"]
 												tag.replace_with(a)
 												continue
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+											tag["loading"] = "lazy"
 											tag["data-src"] = tag["src"]
-												use SITE_FULL_IMAGES in more places

											
										
										
											2023-03-19 16:28:19 +00:00
+											tag["src"] = f"{SITE_FULL_IMAGES}/i/l.webp"
-												test getting rid of ![]( on devrama

											
										
										
											2023-03-12 13:02:31 +00:00
+											tag['alt'] = tag["data-src"]
-												use .img class

											
										
										
											2023-03-12 13:13:28 +00:00
+											tag['class'] = "img"
-												kitchen sink commit, all over the place

											
										
										
											2022-07-02 10:44:05 +00:00
-												fix snappy archiving images

											
										
										
											2022-07-02 00:25:58 +00:00
+											if tag.parent.name != 'a':
-												kitchen sink commit, all over the place

											
										
										
											2022-07-02 10:44:05 +00:00
+												a = soup.new_tag("a", href=tag["data-src"])
 												if not is_site_url(a["href"]):
-												remove redundant "noreferrer"

											
										
										
											2022-10-29 21:46:30 +00:00
+													a["rel"] = "nofollow noopener"
-												Revert "stop adding target="_blank" in the backend and move it to the frontend (to accomodate PWA users) - THANK YOU GEESE I LOVE YOU SO MUCH (#473)"

This reverts commit 88f3cd519d37e9473bb93239de4981efab688ed7.

											
										
										
											2022-11-21 17:37:38 +00:00
+													a["target"] = "_blank"
-												fix snappy archiving images

											
										
										
											2022-07-02 00:25:58 +00:00
+												tag = tag.replace_with(a)
 												a.append(tag)
-												embed rework

											
										
										
											2022-06-27 01:00:45 +00:00
-												better 200w behavior

											
										
										
											2023-01-01 11:30:33 +00:00
+											tag["data-src"] = tag["data-src"].replace('/giphy.webp', '/200w.webp')
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
+									sanitized = str(soup).replace('<html><body>','').replace('</body></html>','')
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = spoiler_regex.sub(r'<spoiler>\1</spoiler>', sanitized)
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									emojis_used = set()
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									emojis = list(emoji_regex.finditer(sanitized))
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if len(emojis) > 20: golden = False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									captured = []
 									for i in emojis:
 										if i.group(0) in captured: continue
 										captured.append(i.group(0))
 										old = i.group(0)
-												fix this https://rdrama.net/h/vampire/post/166279/this-is-mavis/4056251#context

											
										
										
											2023-04-27 18:06:44 +00:00
+										if 'marseylong1' in old or 'marseylong2' in old or 'marseylongcockandballs' in old or 'marseyllama1' in old or 'marseyllama2' in old:
-												add line break

											
										
										
											2023-04-23 13:15:29 +00:00
+											new = old.lower().replace(">", " class='mb-0'>")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										else: new = old.lower()
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+										new = render_emoji(new, emoji_regex2, golden, emojis_used, True)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 										sanitized = sanitized.replace(old, new)
 									emojis = list(emoji_regex2.finditer(sanitized))
-												minor sanitize refactor

											
										
										
											2022-09-16 16:30:34 +00:00
+									if len(emojis) > 20: golden = False
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									sanitized = render_emoji(sanitized, emoji_regex2, golden, emojis_used)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fixing https://rdrama.net/post/69817/tfw-you-will-never-troll-harder/1961118?context=8#context

											
										
										
											2022-05-22 10:20:11 +00:00
+									sanitized = sanitized.replace('&amp;','&')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									captured = []
 									for i in youtube_regex.finditer(sanitized):
 										if i.group(0) in captured: continue
 										captured.append(i.group(0))
-												fix this https://stupidpol.site/post/145206/larry-feinberg-cancelled-seinfeld-ai-banned/3546391#context

											
										
										
											2023-02-07 01:12:14 +00:00
+										html = handle_youtube_links(i.group(2))
-												fix youtube timestamps

											
										
										
											2023-01-23 02:06:56 +00:00
+										if html:
-												fix this https://stupidpol.site/post/145206/larry-feinberg-cancelled-seinfeld-ai-banned/3546391#context

											
										
										
											2023-02-07 01:12:14 +00:00
+											sanitized = sanitized.replace(i.group(0), i.group(1) + html)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												resizable videos

											
										
										
											2022-12-10 19:12:14 +00:00
+									sanitized = video_sub_regex.sub(r'\1<p class="resizable"><video controls preload="none" src="\2"></video></p>', sanitized)
-												revert an earlier change for video and audio files in sanitize()

											
										
										
											2022-10-01 17:42:34 +00:00
+									sanitized = audio_sub_regex.sub(r'\1<audio controls preload="none" src="\2"></audio>', sanitized)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									if count_emojis:
 										for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)).all():
 											emoji.count += 1
 											g.db.add(emoji)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fds

											
										
										
											2022-05-15 08:45:57 +00:00
+									sanitized = sanitized.replace('<p></p>', '')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fix this https://rdrama.net/h/changelog/post/157290/rdramanet-is-proud-to-welcome-the/3852376#context

											
										
										
											2023-03-24 11:31:12 +00:00
+									if g.v and g.v.agendaposter:
 										allowed_css_properties = allowed_styles
 									else:
 										allowed_css_properties = allowed_styles + ["filter"]
 									css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									sanitized = bleach.Cleaner(tags=allowed_tags,
 																attributes=allowed_attributes,
 																protocols=['http', 'https'],
-												Upgrade bleach to 5.0.0.

											
										
										
											2022-05-25 00:27:41 +00:00
+																css_sanitizer=css_sanitizer,
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+																filters=[partial(LinkifyFilter, skip_tags=["pre"],
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
+																	parse_email=False, url_re=url_re)]
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+																).clean(sanitized)
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
+									#doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									soup = BeautifulSoup(sanitized, 'lxml')
 									links = soup.find_all("a")
-												instead of error message when posting a banned domain, unlinkify instead

											
										
										
											2023-05-13 02:53:51 +00:00
+									banned_domains = [x.domain for x in g.db.query(BannedDomain.domain).all()]
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 									for link in links:
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
+										#remove empty links
 										if not link.contents or not str(link.contents[0]).strip():
 											link.extract()
 											continue
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										href = link.get("href")
 										if not href: continue
-												instead of error message when posting a banned domain, unlinkify instead

											
										
										
											2023-05-13 02:53:51 +00:00
+										domain = tldextract.extract(href).registered_domain
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
-												instead of error message when posting a banned domain, unlinkify instead

											
										
										
											2023-05-13 02:53:51 +00:00
+										def unlinkfy():
-												fix this https://rdrama.net/@TheDunceonMaster/wall/comment/4142982#context

											
										
										
											2023-05-12 19:30:47 +00:00
+											link.string = href
 											del link["href"]
-												instead of error message when posting a banned domain, unlinkify instead

											
										
										
											2023-05-13 02:53:51 +00:00
+										#\ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
 										if "\\" in href:
 											unlinkfy()
 											continue
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
-												fix this https://rdrama.net/@TheDunceonMaster/wall/comment/4142982#context

											
										
										
											2023-05-12 19:30:47 +00:00
+										#don't allow something like this https://rdrama.net/post/78376/reminder-of-the-fact-that-our/2150032#context
 										if domain and not allowed_domain_regex.fullmatch(domain):
-												instead of error message when posting a banned domain, unlinkify instead

											
										
										
											2023-05-13 02:53:51 +00:00
+											unlinkfy()
 											continue
 										#check for banned domain
 										combined = (domain + urlparse(href).path).lower()
 										if any((combined.startswith(x) for x in banned_domains)):
 											unlinkfy()
 											continue
 										#don't allow something like this [https://rԁrama.net/leaderboard](https://iplogger.org/1fRKk7)
 										if not snappy and tldextract.extract(str(link.string)).registered_domain:
-												fix links linkfied by linkfyfilter getting undone by my code

											
										
										
											2023-05-13 03:55:07 +00:00
+											link.string = href
-. unify all link processing logic in one place
2. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4113391#context
3. fix this https://rdrama.net/post/168836/texas-shooter-identified-as-mauricio-garcia/4142945#context

											
										
										
											2023-05-12 19:12:02 +00:00
 										#insert target="_blank" and ref="nofollower noopener" for external link
 										if not href.startswith('/') and not href.startswith(f'{SITE_FULL}/'):
 											link["target"] = "_blank"
 											link["rel"] = "nofollow noopener"
 									sanitized = str(soup).replace('<html><body>','').replace('</body></html>','')
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												dont use abort in chat

											
										
										
											2023-01-27 07:07:58 +00:00
+									def error(error):
-												fix chat

											
										
										
											2023-02-08 02:14:54 +00:00
+										if chat:
-												dont use abort in chat

											
										
										
											2023-01-27 07:07:58 +00:00
+											return error, 403
 										else:
 											abort(403, error)
-												add 'blog' tld (#108)

https://scottaaronson.blog/ is a thing, for example. We must support the thing.

Add .blog as an approved TLD.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/108
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-02-01 23:20:08 +00:00
-												remove IGNORE_DOMAIN_BAN perm

											
										
										
											2023-05-12 19:21:50 +00:00
 									if discord_username_regex.match(sanitized):
 										return error("Stop grooming!")
-												ban discord ids

											
										
										
											2023-01-23 07:38:16 +00:00
-												expand blackjack

											
										
										
											2023-02-07 03:31:49 +00:00
+									if '<pre>' not in sanitized and blackjack != "rules":
-												fix issue with code blocks

											
										
										
											2022-06-30 23:01:10 +00:00
+										sanitized = sanitized.replace('\n','')
-												add "show more..." button

											
										
										
											2022-06-29 00:55:44 +00:00
-												fix last commit

											
										
										
											2023-03-11 07:36:41 +00:00
+									if showmore:
-												showmore on too many newlines (works now) (#90)

At this point I think we should stop abusing regexes to implement the showmore logic.

I also reduced the character constant to 3000/2500 and increased the line constant to 20 but I don't have any strong feelings about them.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/90
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-22 23:27:24 +00:00
+										# Insert a show more button if the text is too long or has too many paragraphs
-												fix an edge case in showmore (when many long paragraphs) (#93)

$subj

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/93
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-24 03:56:14 +00:00
+										CHARLIMIT = 3000
-												showmore on too many newlines (works now) (#90)

At this point I think we should stop abusing regexes to implement the showmore logic.

I also reduced the character constant to 3000/2500 and increased the line constant to 20 but I don't have any strong feelings about them.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/90
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-22 23:27:24 +00:00
+										pos = 0
 										for _ in range(20):
-												Revert "improve the showmore algo"

This reverts commit 9529cbbd616bf83ce5d2075e2f36fe068d08c30f.

											
										
										
											2023-02-24 07:29:46 +00:00
+											pos = sanitized.find('</p>', pos + 4)
-												showmore on too many newlines (works now) (#90)

At this point I think we should stop abusing regexes to implement the showmore logic.

I also reduced the character constant to 3000/2500 and increased the line constant to 20 but I don't have any strong feelings about them.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/90
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-22 23:27:24 +00:00
+											if pos < 0:
 												break
-												fix an edge case in showmore (when many long paragraphs) (#93)

$subj

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/93
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-24 03:56:14 +00:00
+										if (pos < 0 and len(sanitized) > CHARLIMIT) or pos > CHARLIMIT:
 											pos = CHARLIMIT - 500
-												showmore on too many newlines (works now) (#90)

At this point I think we should stop abusing regexes to implement the showmore logic.

I also reduced the character constant to 3000/2500 and increased the line constant to 20 but I don't have any strong feelings about them.

Co-authored-by: official-techsupport <official_techsupport@protonmail.com>
Reviewed-on: https://fsdfsd.net/rDrama/rDrama/pulls/90
Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>

											
										
										
											2023-01-22 23:27:24 +00:00
+										if pos >= 0:
-												fix showmore

											
										
										
											2023-03-10 23:30:42 +00:00
+											sanitized = (sanitized[:pos] + showmore_regex.sub(r'\1<p><button class="showmore">SHOW MORE</button></p><d class="d-none">\2</d>', sanitized[pos:], count=1))
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												strip sanitized text

											
										
										
											2022-07-02 10:12:52 +00:00
+									return sanitized.strip()
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
 								def allowed_attributes_emojis(tag, name, value):
 									if tag == 'img':
-												crgd is a king

											
										
										
											2022-05-25 18:29:22 +00:00
+										if name == 'src' and value.startswith('/') and '\\' not in value: return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+										if name == 'loading' and value == 'lazy': return True
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
-												use sets instead of lists in some statements

											
										
										
											2022-11-26 04:52:47 +00:00
+										if name in {'g','glow'} and not value: return True
 										if name in {'alt','title'}: return True
-												pls review

											
										
										
											2022-05-17 19:58:41 +00:00
 									if tag == 'span':
 										if name == 'data-bs-toggle' and value == 'tooltip': return True
 										if name == 'title': return True
 										if name == 'alt': return True
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
+									return False
-												fix exceptions in sanitize leaving SIGALRM on

											
										
										
											2022-07-05 22:11:45 +00:00
+								@with_sigalrm_timeout(1)
-												fix 2 chud phrases

											
										
										
											2023-03-23 15:36:28 +00:00
+								def filter_emojis_only(title, golden=True, count_emojis=False, graceful=False, strip=True):
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												dont strip poll options

											
										
										
											2023-03-19 17:53:33 +00:00
+									title = title.replace('‎','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace('<','&lt;').replace('>','&gt;').replace("﷽","")
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									emojis_used = set()
-												make marsey counter work everywhere (not just comments)

											
										
										
											2022-06-13 18:05:24 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									title = render_emoji(title, emoji_regex3, golden, emojis_used)
-												make marsey counter work everywhere (not just comments)

											
										
										
											2022-06-13 18:05:24 +00:00
-												count non-marsey emojis too (to be used for emoji picker prioritization)

											
										
										
											2023-03-19 08:33:04 +00:00
+									if count_emojis:
 										for emoji in g.db.query(Emoji).filter(Emoji.submitter_id==None, Emoji.name.in_(emojis_used)).all():
 											emoji.count += 1
 											g.db.add(emoji)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												fixed strikethrough

											
										
										
											2022-06-28 05:41:21 +00:00
+									title = strikethrough_regex.sub(r'\1<del>\2</del>', title)
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												dont strip poll options

											
										
										
											2023-03-19 17:53:33 +00:00
+									title = bleach.clean(title, tags=['img','del','span'], attributes=allowed_attributes_emojis, protocols=['http','https']).replace('\n','')
 									if strip:
 										title = title.strip()
-												mn

											
										
										
											2022-05-04 23:09:46 +00:00
-												introduce constant for post and title length

											
										
										
											2022-10-05 08:35:35 +00:00
+									if len(title) > POST_TITLE_HTML_LENGTH_LIMIT and not graceful: abort(400)
 									else: return title
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												re-refactor normalize_url

											
										
										
											2022-06-10 20:02:15 +00:00
+								def normalize_url(url):
-												fix reddit domain replacement

											
										
										
											2022-07-04 03:08:33 +00:00
+									url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
-												re-refactor normalize_url

											
										
										
											2022-06-10 20:02:15 +00:00
-												Revert "make the "https://" optional to embed a youtube video (I rly hope i dont regret this)"

This reverts commit 5ad46f3e538e3c42b508e853d27da78ef374c8f2.

											
										
										
											2023-04-25 08:01:51 +00:00
+									url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
 											 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
 											 .replace("https://www.youtube.com", "https://youtube.com") \
 											 .replace("https://m.youtube.com", "https://youtube.com") \
 											 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
 											 .replace("https://youtube.com/v/", "https://youtube.com/watch?v=") \
-												refactor normalizing urls at runtime (I put the function in comment.py cuz there were weird import errors that i didnt wanna fix)

											
										
										
											2022-06-23 15:47:57 +00:00
+											 .replace("https://mobile.twitter.com", "https://twitter.com") \
 											 .replace("https://m.facebook.com", "https://facebook.com") \
 											 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
 											 .replace("https://www.twitter.com", "https://twitter.com") \
 											 .replace("https://www.instagram.com", "https://instagram.com") \
 											 .replace("https://www.tiktok.com", "https://tiktok.com") \
 											 .replace("https://www.streamable.com", "https://streamable.com") \
-												replace streamable links with full-size version

											
										
										
											2022-06-10 14:35:09 +00:00
+											 .replace("https://streamable.com/", "https://streamable.com/e/") \
-												replace search.marsey.cat with camas.unddit.com

											
										
										
											2022-07-15 13:00:51 +00:00
+											 .replace("https://streamable.com/e/e/", "https://streamable.com/e/") \
-												replace "https://imgur.com/" with "https://i.imgur.com/" for the sake of mobilecels

											
										
										
											2022-08-13 05:06:53 +00:00
+											 .replace("https://search.marsey.cat/#", "https://camas.unddit.com/#") \
-												nitter shit

											
										
										
											2022-09-29 05:36:10 +00:00
+											 .replace("https://imgur.com/", "https://i.imgur.com/") \
 											 .replace("https://nitter.net/", "https://twitter.com/") \
 											 .replace("https://nitter.42l.fr/", "https://twitter.com/") \
-												replace giphy.gif with giphy.webp

											
										
										
											2022-12-25 00:54:47 +00:00
+											 .replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/") \
-												patch exploit that allowed u to post banned domains

											
										
										
											2023-05-05 00:17:57 +00:00
+											 .replace("/giphy.gif", "/giphy.webp") \
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												constantify image extensions and fix imgur .gif bug

											
										
										
											2022-11-05 21:01:23 +00:00
+									url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
-												small image embed improvements

											
										
										
											2022-06-11 12:21:59 +00:00
+									url = giphy_regex.sub(r'\1.webp', url)
-												fix this https://rdrama.net/post/167983/what-happened-to-jon-stewart-gets/4096939#context

											
										
										
											2023-05-05 00:23:54 +00:00
+									url = unquote(url)
-												Sanitize: modularize normalize_url, fix streamable.

Originally prompted by https://rdrama.net/post/18459/-/1984609 which
noticed that streamable.com/e/ links as posts would have another e/
added to them. This was in spite of logic in posts.py api_is_repost
and submit_post designed to specifically counteract this.
Proximal cause was a copypasta'd url.replace(...) chain which
caused the mistake before the streamable-specific logic had a chance
to avoid making it.

Solution: remove the streamable replacement from the chained statement
and create `helpers.normalize_url(url)` to get rid of the copypasta.

											
										
										
											2022-05-25 08:43:16 +00:00
-												remove weird trailing tabs

											
										
										
											2022-06-11 09:56:16 +00:00
+									return url
-												Check URI approved embed in all CSS contexts.

											
										
										
											2022-08-05 17:09:41 +00:00
 								def validate_css(css):
 									if '@import' in css:
-												include "css" in error

											
										
										
											2023-03-11 21:55:40 +00:00
+										return False, "CSS @import statements are not allowed!"
-												Check URI approved embed in all CSS contexts.

											
										
										
											2022-08-05 17:09:41 +00:00
-												actually disable comments in css

											
										
										
											2023-02-18 20:00:39 +00:00
+									if '/*' in css:
-												include "css" in error

											
										
										
											2023-03-11 21:55:40 +00:00
+										return False, "CSS comments are not allowed!"
-												disallow comments in css

											
										
										
											2023-02-18 19:49:11 +00:00
-												Check URI approved embed in all CSS contexts.

											
										
										
											2022-08-05 17:09:41 +00:00
+									for i in css_url_regex.finditer(css):
 										url = i.group(1)
 										if not is_safe_url(url):
 											domain = tldextract.extract(url).registered_domain
 											return False, f"The domain '{domain}' is not allowed, please use one of these domains\n\n{approved_embed_hosts}."
 									return True, ""
-												much better chud award logic

											
										
										
											2023-03-22 21:39:25 +00:00
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
 								def torture_ap(string, username):
 									if not string: return string
 									for k, l in AJ_REPLACEMENTS.items():
 										string = string.replace(k, l)
-												fix internal server error for chudded ppl whose name starts with a number (like 911roofer) bc of invalid regex group reference (lol)

											
										
										
											2023-04-24 06:58:31 +00:00
+									string = torture_regex.sub(rf'\1@{username}\3', string)
 									string = torture_regex2.sub(rf'\1@{username} is\3', string)
 									string = torture_regex3.sub(rf"\1@{username}'s\3", string)
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
+									return string
-												much better chud award logic

											
										
										
											2023-03-22 21:39:25 +00:00
+								def complies_with_chud(obj):
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
+									#check for cases where u should leave
-												dedup chud-checking logic

											
										
										
											2023-03-23 12:50:01 +00:00
+									if not obj.author.agendaposter: return True
 									if obj.author.marseyawarded: return True
 									if isinstance(obj, Submission):
 										if obj.id in ADMIGGER_THREADS: return True
 										if obj.sub == "chudrama": return True
 									elif obj.parent_submission:
 										if obj.parent_submission in ADMIGGER_THREADS: return True
 										if obj.post.sub == "chudrama": return True
-												fix "I stand with israel" and "I say this as a feminist ally" again lol

											
										
										
											2023-03-25 22:18:23 +00:00
+									#perserve old body_html to be used in checking for chud phrase
 									old_body_html = obj.body_html
-												fix bug where including the agendaposter phrase in the title exempted u from torture_ap

											
										
										
											2023-03-23 19:03:02 +00:00
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
+									#torture body_html
-												fix using emojis in polls while chudded

											
										
										
											2023-04-27 14:12:56 +00:00
+									if obj.body_html and '<p>&amp;&amp;' not in obj.body_html and '<p>$$' not in obj.body_html and '<p>##' not in obj.body_html:
-												minor fix

											
										
										
											2023-03-26 12:27:40 +00:00
+										soup = BeautifulSoup(obj.body_html, 'lxml')
 										tags = soup.html.body.find_all(lambda tag: tag.name not in {'blockquote','codeblock','pre'} and tag.string, recursive=False)
 										for tag in tags:
 											tag.string.replace_with(torture_ap(tag.string, obj.author.username))
 										obj.body_html = str(soup).replace('<html><body>','').replace('</body></html>','')
-												fix last commit

											
										
										
											2023-03-23 15:41:57 +00:00
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
+									#torture title_html and check for agendaposter_phrase in plain title and leave if it's there
 									if isinstance(obj, Submission):
 										obj.title_html = torture_ap(obj.title_html, obj.author.username)
 										if obj.author.agendaposter_phrase in obj.title.lower():
 											return True
-												print-debug weird 500 error

											
										
										
											2023-03-23 20:19:29 +00:00
-												refactor torturing and also replace "my" and "mine"

											
										
										
											2023-03-25 18:18:48 +00:00
+									#check for agendaposter_phrase in body_html
-												minor fix

											
										
										
											2023-03-26 12:28:32 +00:00
+									if old_body_html:
 										excluded_tags = {'del','sub','sup','marquee','spoiler','lite-youtube','video','audio'}
 										soup = BeautifulSoup(old_body_html, 'lxml')
 										tags = soup.html.body.find_all(lambda tag: tag.name not in excluded_tags and not tag.attrs, recursive=False)
 										for tag in tags:
 											for text in tag.find_all(text=True, recursive=False):
 												if obj.author.agendaposter_phrase in text.lower():
 													return True
-												much better chud award logic

											
										
										
											2023-03-22 21:39:25 +00:00
 									return False