import bleach from bs4 import BeautifulSoup from bleach.linkifier import LinkifyFilter from functools import partial from .get import * from os import path, environ import re from mistletoe import markdown from json import loads, dump from random import random, choice import signal import time from urllib.parse import ParseResult, urlunparse, urlparse allowed_tags = tags = ['b', 'blockquote', 'br', 'code', 'del', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'li', 'ol', 'p', 'pre', 'strong', 'sup', 'table', 'tbody', 'th', 'thead', 'td', 'tr', 'ul', 'marquee', 'a', 'img', 'span', 'ruby', 'rp', 'rt', ] no_images = ['b', 'blockquote', 'br', 'code', 'del', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'li', 'ol', 'p', 'pre', 'strong', 'sup', 'table', 'tbody', 'th', 'thead', 'td', 'tr', 'ul', 'marquee', 'a', 'span', 'ruby', 'rp', 'rt', ] def sanitize_marquee(tag, name, value): if name in allowed_attributes['*'] or name in ['direction', 'behavior', 'scrollamount']: return True if name in {'height', 'width'}: try: value = int(value.replace('px', '')) except: return False if 0 < value <= 250: return True return False allowed_attributes = { '*': ['href', 'style', 'src', 'class', 'title', 'loading'], 'marquee': sanitize_marquee} allowed_protocols = ['http', 'https'] allowed_styles = ['color', 'background-color', 'font-weight', 'text-align'] def handler(signum, frame): print("Forever is over!") raise Exception("end of time") def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False): signal.signal(signal.SIGALRM, handler) signal.alarm(1) sanitized = markdown(sanitized) sanitized = strikethrough_regex.sub(r'\1', sanitized) sanitized = sanitized.replace("\ufeff", "").replace("𒐪","").replace("","").replace('‎','').replace("https://youtu.be/", "https://youtube.com/watch?v=").replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=").replace("https://streamable.com/", "https://streamable.com/e/").replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=").replace("https://mobile.twitter", "https://twitter").replace("https://m.facebook", "https://facebook").replace("m.wikipedia.org", "wikipedia.org").replace("https://m.youtube", "https://youtube").replace("https://www.youtube", "https://youtube").replace("old.reddit.com/gallery", "reddit.com/gallery") if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=") for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it"]: sanitized = sanitized.replace(rd, "://old.reddit.com") if alert: captured = [] for i in mention_regex2.finditer(sanitized): if i.group(0) in captured: continue captured.append(i.group(0)) u = get_user(i.group(1), graceful=True) if u: sanitized = sanitized.replace(i.group(0), f'''

@{u.username}''', 1) else: sanitized = reddit_regex.sub(r'\1/\2', sanitized) sanitized = sub_regex.sub(r'\1/\2', sanitized) captured = [] for i in mention_regex.finditer(sanitized): if i.group(0) in captured: continue captured.append(i.group(0)) u = get_user(i.group(2), graceful=True) if u and (not g.v.any_block_exists(u) or g.v.admin_level > 1): if noimages: sanitized = sanitized.replace(i.group(0), f'{i.group(1)}@{u.username}', 1) else: sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}@{u.username}''', 1) sanitized = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', sanitized) if noimages: sanitized = bleach.Cleaner(tags=no_images, attributes=allowed_attributes, protocols=allowed_protocols, styles=allowed_styles, filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, ) ] ).clean(sanitized) else: sanitized = bleach.Cleaner(tags=allowed_tags, attributes=allowed_attributes, protocols=['http', 'https'], styles=['color','font-weight','transform','-webkit-transform'], filters=[partial(LinkifyFilter, skip_tags=["pre"], parse_email=False, ) ] ).clean(sanitized) soup = BeautifulSoup(sanitized, 'lxml') for tag in soup.find_all("img"): if tag.get("src") and tag.get("class") != ['pp20']: tag["class"] = "in-comment-image" tag["loading"] = "lazy" tag["data-src"] = tag["src"] tag["src"] = "/static/assets/images/loading.webp" tag['alt'] = f'![]({tag["data-src"]})' tag["onclick"] = "expandDesktopImage(this.src);" tag["data-bs-toggle"] = "modal" tag["data-bs-target"] = "#expandImageModal" tag['referrerpolicy'] = "no-referrer" for tag in soup.find_all("a"): if tag.get("href"): if not tag["href"].startswith(SITE_FULL) and not tag["href"].startswith('/') and not tag["href"].startswith(SITE_FULL2): tag["target"] = "_blank" tag["rel"] = "nofollow noopener noreferrer" if fishylinks_regex.fullmatch(str(tag.string)): try: tag.string = tag["href"] except: tag.string = "" parsed_url = urlparse(tag.get("href")) domain = parsed_url.netloc if domain == 'old.reddit.com': new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=None, fragment=parsed_url.fragment) else: qd = parse_qs(parsed_url.query) filtered = {k: val for k, val in qd.items() if not k.startswith('utm_') and not k.startswith('ref_')} new_url = ParseResult(scheme="https", netloc=parsed_url.netloc, path=parsed_url.path, params=parsed_url.params, query=urlencode(filtered, doseq=True), fragment=parsed_url.fragment) new_url = urlunparse(new_url) if tag.string == tag["href"]: tag.string = new_url tag["href"] = new_url sanitized = str(soup) sanitized = spoiler_regex.sub(r'\1', sanitized) if comment: marseys_used = set() emojis = list(emoji_regex.finditer(sanitized)) if len(emojis) > 20: edit = True captured = [] for i in emojis: if i.group(0) in captured: continue captured.append(i.group(0)) old = i.group(0) if 'marseylong1' in old or 'marseylong2' in old or 'marseyllama1' in old or 'marseyllama2' in old: new = old.lower().replace(">", " class='mb-0'>") else: new = old.lower() captured2 = [] for i in emoji_regex2.finditer(new): if i.group(0) in captured2: continue captured2.append(i.group(0)) emoji = i.group(1).lower() if emoji.startswith("#!") or emoji.startswith("!#"): classes = 'emoji-lg mirrored' remoji = emoji[2:] elif emoji.startswith("#"): classes = 'emoji-lg' remoji = emoji[1:] elif emoji.startswith("!"): classes = 'emoji-md mirrored' remoji = emoji[1:] else: classes = 'emoji-md' remoji = emoji if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): classes += ' golden' if remoji == 'marseyrandom': remoji = choice(marseys_const2) if path.isfile(f'files/assets/images/emojis/{remoji}.webp'): new = re.sub(f'(?', new, flags=re.I|re.A) if comment: marseys_used.add(emoji) sanitized = sanitized.replace(old, new) emojis = list(emoji_regex3.finditer(sanitized)) if len(emojis) > 20: edit = True captured = [] for i in emojis: if i.group(0) in captured: continue captured.append(i.group(0)) emoji = i.group(1).lower().replace('#','') if emoji.startswith("!"): emoji = emoji[1:] classes = 'emoji mirrored' if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): classes += ' golden' old = emoji if emoji == 'marseyrandom': emoji = choice(marseys_const2) else: emoji = old if path.isfile(f'files/assets/images/emojis/{emoji}.webp'): sanitized = re.sub(f'(?', sanitized, flags=re.I|re.A) if comment: marseys_used.add(emoji) else: classes = 'emoji' if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): classes += ' golden' old = emoji if emoji == 'marseyrandom': emoji = choice(marseys_const2) else: emoji = old if path.isfile(f'files/assets/images/emojis/{emoji}.webp'): sanitized = re.sub(f'(?', sanitized, flags=re.I|re.A) if comment: marseys_used.add(emoji) captured = [] for i in youtube_regex.finditer(sanitized): if i.group(0) in captured: continue captured.append(i.group(0)) url = i.group(1) yt_id = i.group(2).split('&')[0].split('%')[0] replacing = f'{url}' params = parse_qs(urlparse(url.replace('&','&')).query) t = params.get('t', params.get('start', [0]))[0] if isinstance(t, str): t = t.replace('s','') htmlsource = f'' sanitized = sanitized.replace(replacing, htmlsource) sanitized = unlinked_regex.sub(r'\1\2', sanitized) if not noimages: sanitized = video_regex.sub(r'

', sanitized) if comment: for marsey in g.db.query(Marsey).filter(Marsey.name.in_(marseys_used)).all(): marsey.count += 1 g.db.add(marsey) signal.alarm(0) return sanitized def handler2(signum, frame): print("Forever is over!") raise Exception("end of time") def filter_emojis_only(title, edit=False, graceful=False): signal.signal(signal.SIGALRM, handler2) signal.alarm(1) title = title.replace("","").replace("&", "&").replace('<','<').replace('>','>').replace('"', '"').replace("'", "'").replace("\ufeff", "").replace("𒐪","").replace('‎','').replace("\n", "").replace("\r", "").replace("\t", "").strip() title = bleach.clean(title, tags=[]) emojis = list(emoji_regex4.finditer(title)) if len(emojis) > 20: edit = True captured = [] for i in emojis: if i.group(0) in captured: continue captured.append(i.group(0)) emoji = i.group(1).lower() if emoji.startswith("!"): emoji = emoji[1:] classes = 'emoji mirrored' if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): classes += ' golden' old = emoji if emoji == 'marseyrandom': emoji = choice(marseys_const2) else: emoji = old if path.isfile(f'files/assets/images/emojis/{emoji}.webp'): title = re.sub(f'(?', title, flags=re.I|re.A) else: classes = 'emoji' if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): classes += ' golden' old = emoji if emoji == 'marseyrandom': emoji = choice(marseys_const2) else: emoji = old if path.isfile(f'files/assets/images/emojis/{emoji}.webp'): title = re.sub(f'(?', title, flags=re.I|re.A) title = strikethrough_regex.sub(r'\1', title) signal.alarm(0) if len(title) > 1500 and not graceful: abort(400) else: return title