import bleach
from bs4 import BeautifulSoup
from bleach.linkifier import LinkifyFilter
from functools import partial
from .get import *
from .patter import pat
from os import path, environ
import re
from mistletoe import markdown
from json import loads, dump
from random import random, choice
import signal
import time
import requests
def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
def handler(signum, frame):
raise Exception("Timeout")
signal.signal(signal.SIGALRM, handler)
sanitized = image_check_regex.sub(r'\1', sanitized)
sanitized = markdown(sanitized)
sanitized = strikethrough_regex.sub(r'\1', sanitized)
sanitized = sanitized.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","")
if alert:
captured = []
for i in mention_regex2.finditer(sanitized):
if in captured: continue
u = get_user(, graceful=True)
if u:
sanitized = sanitized.replace(, f'''
sanitized = reddit_regex.sub(r'\1/\2', sanitized)
sanitized = sub_regex.sub(r'\1/\2', sanitized)
captured = []
for i in mention_regex.finditer(sanitized):
if in captured: continue
u = get_user(, graceful=True)
if u and (not g.v.any_block_exists(u) or g.v.admin_level > 1):
if noimages:
sanitized = sanitized.replace(, f'{}@{u.username}', 1)
sanitized = sanitized.replace(, f'''{}@{u.username}''')
sanitized = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', sanitized)
soup = BeautifulSoup(sanitized, 'lxml')
for tag in soup.find_all("img"):
if tag.get("src") and not tag["src"].startswith('/pp/'):
tag["loading"] = "lazy"
tag["data-src"] = tag["src"]
tag["src"] = "/assets/images/loading.webp"
tag['alt'] = f'![]({tag["data-src"]})'
tag['referrerpolicy'] = "no-referrer"
for tag in soup.find_all("a"):
del tag["rel"]
if tag.get("href"):
if not tag["href"].startswith(SITE_FULL) and not tag["href"].startswith('/') and not tag["href"].startswith(SITE_FULL2):
tag["target"] = "_blank"
tag["rel"] = "nofollow noopener noreferrer"
if fishylinks_regex.fullmatch(str(tag.string)):
try: tag.string = tag["href"]
except: tag.string = ""
sanitized = str(soup)
sanitized = spoiler_regex.sub(r'
', sanitized) if comment: for marsey in g.db.query(Marsey).filter( marsey.count += 1 g.db.add(marsey) if '#fortune' in sanitized: sanitized = sanitized.replace('#fortune', '') sanitized += '\n\n
' + choice(FORTUNE_REPLIES) + '
' sanitized = sanitized.replace('&','&') sanitized = utm_regex.sub('', sanitized) sanitized = utm_regex2.sub('', sanitized) sanitized = sanitized.replace('','').replace('','') allowed_tags = ['b','blockquote','br','code','del','em','h1','h2','h3','h4','h5','h6','hr','i','li','ol','p','pre','strong','sub','sup','table','tbody','th','thead','td','tr','ul','marquee','a','span','ruby','rp','rt','spoiler'] if not noimages: allowed_tags += ['img','video','lite-youtube','source'] def allowed_attributes(tag, name, value): if name == 'style': return True if tag == 'marquee': if name in ['direction', 'behavior', 'scrollamount']: return True if name in {'height', 'width'}: try: value = int(value.replace('px', '')) except: return False if 0 < value <= 250: return True return False if tag == 'a': if name == 'href': return True if name == 'rel' and value == 'nofollow noopener noreferrer': return True if name == 'target' and value == '_blank': return True return False if tag == 'img': if name == 'loading' and value == 'lazy': return True if name == 'referrpolicy' and value == 'no-referrer': return True if name == 'data-bs-toggle' and value == 'tooltip': return True if name in ['src','data-src','alt','title','g','b']: return True return False if tag == 'lite-youtube': if name == 'params' and value.startswith('autoplay=1&modestbranding=1'): return True if name == 'videoid': return True return False if tag == 'video': if name == 'controls' and value == '': return True if name == 'preload' and value == 'none': return True return False if tag == 'source': if name == 'src': return True return False sanitized = bleach.Cleaner(tags=allowed_tags, attributes=allowed_attributes, protocols=['http', 'https'], styles=['color', 'background-color', 'font-weight', 'text-align'], filters=[partial(LinkifyFilter,skip_tags=["pre"],parse_email=False)] ).clean(sanitized) signal.alarm(0) return sanitized def filter_emojis_only(title, edit=False, graceful=False): def handler(signum, frame): print("Timeout!") raise Exception("Timeout") signal.signal(signal.SIGALRM, handler) signal.alarm(1) title = title.replace('','').replace('','').replace("\ufeff", "").replace("𒐪","").replace("\n", "").replace("\r", "").replace("\t", "").replace("&", "&").replace('<','<').replace('>','>').replace('"', '"').replace("'", "'").strip() emojis = list(emoji_regex4.finditer(title)) if len(emojis) > 20: edit = True captured = [] for i in emojis: if in captured: continue captured.append( emoji = golden = ' ' if not edit and random() < 0.0025 and ('marsey' in emoji or emoji in marseys_const2): golden = 'g ' old = emoji emoji = emoji.replace('!','').replace('#','') if emoji == 'marseyrandom': emoji = choice(marseys_const2) if path.isfile(f'files/assets/images/emojis/{emoji}.webp'): title = re.sub(f'(?', title, flags=re.I|re.A) elif emoji.endswith('pat') and path.isfile(f"files/assets/images/emojis/{emoji.replace('pat','')}.webp"): pat(emoji.replace('pat','')) title = re.sub(f'(?', title, flags=re.I|re.A)'{CF_ZONE}/purge_cache', headers=CF_HEADERS, data={'files': [f"https://{}/e/{emoji}.webp"]}, timeout=5) title = strikethrough_regex.sub(r'