2022-07-05 22:11:45 +00:00
import functools
2022-11-15 09:19:08 +00:00
import random
import re
import signal
from functools import partial
2023-01-01 07:55:22 +00:00
from os import path , listdir
2023-08-05 19:01:42 +00:00
from urllib . parse import parse_qs , urlparse , unquote , ParseResult , urlencode , urlunparse
2023-09-05 18:24:10 +00:00
import time
2023-10-06 19:22:11 +00:00
import requests
2022-11-15 09:19:08 +00:00
2023-07-26 23:41:52 +00:00
from sqlalchemy . sql import func
2022-05-04 23:09:46 +00:00
import bleach
2022-05-25 00:27:41 +00:00
from bleach . css_sanitizer import CSSSanitizer
2022-07-15 13:27:45 +00:00
from bleach . linkifier import LinkifyFilter
2022-11-15 09:19:08 +00:00
from bs4 import BeautifulSoup
2022-05-04 23:09:46 +00:00
from mistletoe import markdown
2023-02-25 22:06:49 +00:00
2022-11-15 09:19:08 +00:00
from files . classes . domains import BannedDomain
2023-02-07 03:31:49 +00:00
from files . classes . mod_logs import ModAction
from files . classes . notifications import Notification
2023-02-25 22:06:49 +00:00
from files . classes . group import Group
2023-08-11 13:34:56 +00:00
from files . classes . follows import Follow
2022-11-15 09:19:08 +00:00
2022-12-11 23:44:34 +00:00
from files . helpers . config . const import *
2022-11-15 09:19:08 +00:00
from files . helpers . const_stateful import *
from files . helpers . regex import *
2023-03-09 22:32:31 +00:00
from files . helpers . get import *
2023-10-07 15:35:16 +00:00
from files . helpers . marsify import *
from files . helpers . owoify import *
from files . helpers . sharpen import *
from files . helpers . queenify import *
2022-05-04 23:09:46 +00:00
2023-10-28 19:21:24 +00:00
allowed_tags = ( ' a ' , ' audio ' , ' b ' , ' big ' , ' blink ' , ' blockquote ' , ' br ' , ' center ' , ' code ' , ' del ' , ' details ' , ' em ' , ' g ' , ' h1 ' , ' h2 ' , ' h3 ' , ' h4 ' , ' h5 ' , ' h6 ' , ' hr ' , ' i ' , ' img ' , ' li ' , ' lite-youtube ' , ' marquee ' , ' ol ' , ' p ' , ' pre ' , ' rp ' , ' rt ' , ' ruby ' , ' small ' , ' span ' , ' spoiler ' , ' strike ' , ' strong ' , ' sub ' , ' summary ' , ' sup ' , ' table ' , ' tbody ' , ' td ' , ' th ' , ' thead ' , ' tr ' , ' u ' , ' ul ' , ' video ' )
2022-05-04 23:09:46 +00:00
2023-09-18 16:28:08 +00:00
allowed_styles = [ ' background-color ' , ' color ' , ' filter ' , ' font-weight ' , ' text-align ' , ' transform ' ]
2022-05-25 00:27:41 +00:00
2022-05-04 23:09:46 +00:00
def allowed_attributes ( tag , name , value ) :
2023-09-18 16:28:08 +00:00
if name == ' style ' :
return True
2022-05-04 23:09:46 +00:00
if tag == ' marquee ' :
2022-11-26 04:52:47 +00:00
if name in { ' direction ' , ' behavior ' , ' scrollamount ' } : return True
2022-05-04 23:09:46 +00:00
if name in { ' height ' , ' width ' } :
try : value = int ( value . replace ( ' px ' , ' ' ) )
except : return False
if 0 < value < = 250 : return True
2022-07-05 22:11:45 +00:00
2022-05-04 23:09:46 +00:00
if tag == ' a ' :
2022-12-06 01:06:04 +00:00
if name == ' href ' and ' \\ ' not in value and ' xn-- ' not in value :
2022-06-19 17:25:55 +00:00
return True
2022-10-29 21:46:30 +00:00
if name == ' rel ' and value == ' nofollow noopener ' : return True
2022-11-21 17:37:38 +00:00
if name == ' target ' and value == ' _blank ' : return True
2022-05-04 23:09:46 +00:00
if tag == ' img ' :
2022-11-26 04:52:47 +00:00
if name in { ' src ' , ' data-src ' } : return is_safe_url ( value )
2022-05-04 23:09:46 +00:00
if name == ' loading ' and value == ' lazy ' : return True
if name == ' data-bs-toggle ' and value == ' tooltip ' : return True
2023-08-05 16:06:03 +00:00
if name in { ' g ' , ' b ' , ' glow ' , ' party ' } and not value : return True
2022-11-26 04:52:47 +00:00
if name in { ' alt ' , ' title ' } : return True
2023-03-12 13:13:28 +00:00
if name == ' class ' and value == ' img ' : return True
2023-08-12 11:04:31 +00:00
if name == ' data-user-submitted ' and not value : return True
2022-05-04 23:09:46 +00:00
if tag == ' lite-youtube ' :
if name == ' params ' and value . startswith ( ' autoplay=1&modestbranding=1 ' ) : return True
if name == ' videoid ' : return True
if tag == ' video ' :
if name == ' controls ' and value == ' ' : return True
if name == ' preload ' and value == ' none ' : return True
2022-05-25 18:29:22 +00:00
if name == ' src ' : return is_safe_url ( value )
2022-05-04 23:09:46 +00:00
2022-05-15 22:47:37 +00:00
if tag == ' audio ' :
2022-05-25 18:29:22 +00:00
if name == ' src ' : return is_safe_url ( value )
2022-05-15 22:47:37 +00:00
if name == ' controls ' and value == ' ' : return True
if name == ' preload ' and value == ' none ' : return True
2022-05-04 23:09:46 +00:00
if tag == ' p ' :
2023-08-29 11:05:10 +00:00
if name == ' class ' and value in { ' mb-0 ' , ' resizable ' , ' yt ' , ' text-center ' } : return True
2022-05-04 23:09:46 +00:00
if tag == ' span ' :
if name == ' data-bs-toggle ' and value == ' tooltip ' : return True
if name == ' title ' : return True
if name == ' alt ' : return True
2023-08-09 08:33:14 +00:00
if name == ' cide ' and not value : return True
2023-08-12 11:04:31 +00:00
if name == ' bounce ' and not value : return True
2022-05-04 23:09:46 +00:00
2022-12-09 21:04:22 +00:00
if tag == ' table ' :
if name == ' class ' and value == ' table ' : return True
2023-01-01 11:36:20 +00:00
2022-12-10 19:12:14 +00:00
return False
2022-05-04 23:09:46 +00:00
2023-05-03 14:12:12 +00:00
def create_comment_duplicated ( text_html ) :
new_comment = Comment ( author_id = AUTOJANNY_ID ,
2023-06-23 13:46:42 +00:00
parent_post = None ,
2023-05-03 14:12:12 +00:00
body_html = text_html ,
distinguish_level = 6 ,
is_bot = True )
g . db . add ( new_comment )
g . db . flush ( )
new_comment . top_comment_id = new_comment . id
return new_comment . id
def send_repeatable_notification_duplicated ( uid , text ) :
2023-05-12 22:29:34 +00:00
if uid in BOT_IDs : return
2023-05-03 14:12:12 +00:00
text_html = sanitize ( text )
2023-06-23 13:46:42 +00:00
existing_comments = g . db . query ( Comment . id ) . filter_by ( author_id = AUTOJANNY_ID , parent_post = None , body_html = text_html , is_bot = True ) . order_by ( Comment . id ) . all ( )
2023-05-03 14:12:12 +00:00
for c in existing_comments :
existing_notif = g . db . query ( Notification . user_id ) . filter_by ( user_id = uid , comment_id = c . id ) . one_or_none ( )
if not existing_notif :
notif = Notification ( comment_id = c . id , user_id = uid )
g . db . add ( notif )
return
cid = create_comment_duplicated ( text_html )
notif = Notification ( comment_id = cid , user_id = uid )
g . db . add ( notif )
2023-05-14 20:48:38 +00:00
def execute_blackjack ( v , target , body , kind ) :
2023-02-07 03:31:49 +00:00
if not blackjack or not body : return False
execute = False
for x in blackjack . split ( ' , ' ) :
if all ( i in body . lower ( ) for i in x . split ( ) ) :
execute = True
if not execute : return False
2023-05-03 15:38:45 +00:00
v . shadowbanned = AUTOJANNY_ID
ma = ModAction (
kind = " shadowban " ,
user_id = AUTOJANNY_ID ,
target_user_id = v . id ,
_note = ' reason: " Blackjack " '
)
g . db . add ( ma )
v . ban_reason = " Blackjack "
g . db . add ( v )
2023-02-07 03:31:49 +00:00
2023-03-16 06:27:58 +00:00
notified_ids = [ x [ 0 ] for x in g . db . query ( User . id ) . filter ( User . admin_level > = PERMS [ ' BLACKJACK_NOTIFICATIONS ' ] ) ]
2023-05-14 20:48:38 +00:00
extra_info = kind
2023-02-07 03:31:49 +00:00
if target :
2023-05-14 20:48:38 +00:00
if kind == ' post ' :
2023-02-07 03:31:49 +00:00
extra_info = target . permalink
2023-05-14 20:48:38 +00:00
elif kind == ' report ' :
2023-02-07 03:31:49 +00:00
extra_info = f " reports on { target . permalink } "
2023-05-14 20:48:38 +00:00
elif kind in { ' comment ' , ' message ' } :
2023-02-07 03:31:49 +00:00
for id in notified_ids :
n = Notification ( comment_id = target . id , user_id = id )
2023-03-16 06:27:58 +00:00
g . db . add ( n )
2023-03-09 22:32:31 +00:00
2023-02-07 03:31:49 +00:00
extra_info = None
if extra_info :
for id in notified_ids :
2023-05-03 14:12:12 +00:00
send_repeatable_notification_duplicated ( id , f " Blackjack by @ { v . username } : { extra_info } " )
2023-02-07 03:31:49 +00:00
return True
2022-05-04 23:09:46 +00:00
2023-10-06 18:56:56 +00:00
def find_all_emoji_endings ( emoji ) :
2023-08-20 01:06:52 +00:00
endings = [ ]
2023-08-09 08:33:14 +00:00
is_non_ending_found = False
while not is_non_ending_found :
2023-09-29 07:15:29 +00:00
if emoji . endswith ( ' pat ' ) :
2023-08-09 08:33:14 +00:00
if ' pat ' in endings :
is_non_ending_found = True
continue
endings . append ( ' pat ' )
2023-09-29 07:15:29 +00:00
emoji = emoji [ : - 3 ]
2023-08-09 08:33:14 +00:00
continue
2023-09-07 15:26:31 +00:00
2023-09-29 07:15:29 +00:00
if emoji . endswith ( ' talking ' ) :
2023-08-09 08:33:14 +00:00
if ' talking ' in endings :
is_non_ending_found = True
continue
endings . append ( ' talking ' )
2023-09-29 07:15:29 +00:00
emoji = emoji [ : - 7 ]
2023-08-09 08:33:14 +00:00
continue
2023-09-29 07:15:29 +00:00
if emoji . endswith ( ' genocide ' ) :
2023-08-09 08:33:14 +00:00
if ' genocide ' in endings :
is_non_ending_found = True
continue
endings . append ( ' genocide ' )
2023-09-29 07:15:29 +00:00
emoji = emoji [ : - 8 ]
2023-08-09 08:33:14 +00:00
continue
2023-09-07 15:26:31 +00:00
2023-09-29 07:15:29 +00:00
if emoji . endswith ( ' love ' ) :
2023-08-17 15:37:30 +00:00
if ' love ' in endings :
2023-08-10 12:36:32 +00:00
is_non_ending_found = True
continue
2023-08-17 15:37:30 +00:00
endings . append ( ' love ' )
2023-09-29 07:15:29 +00:00
emoji = emoji [ : - 4 ]
2023-08-10 12:36:32 +00:00
continue
2023-08-09 08:33:14 +00:00
is_non_ending_found = True
2023-09-07 15:26:31 +00:00
2023-09-29 07:15:29 +00:00
if emoji . endswith ( ' random ' ) :
kind = emoji . split ( ' random ' ) [ 0 ] . title ( )
2023-09-22 16:22:36 +00:00
if kind == ' Donkeykong ' : kind = ' Donkey Kong '
elif kind == ' Marseyflag ' : kind = ' Marsey Flags '
elif kind == ' Marseyalphabet ' : kind = ' Marsey Alphabet '
if kind in EMOJI_KINDS :
2023-10-05 10:19:50 +00:00
emoji = g . db . query ( Emoji . name ) . filter_by ( kind = kind , nsfw = False ) . order_by ( func . random ( ) ) . first ( ) [ 0 ]
2023-09-22 16:22:36 +00:00
2023-09-29 07:15:29 +00:00
return endings , emoji
2023-08-09 08:33:14 +00:00
2023-07-22 10:10:27 +00:00
def render_emoji ( html , regexp , golden , emojis_used , b = False , is_title = False ) :
2022-05-04 23:09:46 +00:00
emojis = list ( regexp . finditer ( html ) )
captured = set ( )
for i in emojis :
if i . group ( 0 ) in captured : continue
captured . add ( i . group ( 0 ) )
emoji = i . group ( 1 ) . lower ( )
attrs = ' '
if b : attrs + = ' b '
2023-07-29 23:36:08 +00:00
if is_title : emoji = emoji . replace ( ' # ' , ' ' )
2023-09-29 01:29:14 +00:00
if golden and len ( emojis ) < = 20 and ( ' marsey ' in emoji or emoji in MARSEYS_CONST2 ) :
2023-08-05 16:06:03 +00:00
if random . random ( ) < 0.005 :
attrs + = ' ' + random . choice ( ( ' g ' , ' glow ' , ' party ' ) )
2022-05-04 23:09:46 +00:00
old = emoji
emoji = emoji . replace ( ' ! ' , ' ' ) . replace ( ' # ' , ' ' )
2023-07-26 23:41:52 +00:00
2023-06-07 05:29:43 +00:00
emoji_partial_pat = ' <img alt= " : {0} : " loading= " lazy " src= " {1} " {2} > '
emoji_partial = ' <img alt= " : {0} : " data-bs-toggle= " tooltip " loading= " lazy " src= " {1} " title= " : {0} : " {2} > '
2022-05-04 23:09:46 +00:00
emoji_html = None
2023-10-06 18:56:56 +00:00
ending_modifiers , emoji = find_all_emoji_endings ( emoji )
2023-08-09 08:33:14 +00:00
is_talking = ' talking ' in ending_modifiers
is_patted = ' pat ' in ending_modifiers
is_talking_first = ending_modifiers . index ( ' pat ' ) > ending_modifiers . index ( ' talking ' ) if is_talking and is_patted else False
2023-08-17 15:37:30 +00:00
is_loved = ' love ' in ending_modifiers
2023-08-09 08:33:14 +00:00
is_genocided = ' genocide ' in ending_modifiers
2023-08-04 10:13:36 +00:00
is_user = emoji . startswith ( ' @ ' )
end_modifier_length = 3 if is_patted else 0
end_modifier_length = end_modifier_length + 7 if is_talking else end_modifier_length
2023-08-04 18:07:36 +00:00
hand_html = f ' <img loading= " lazy " src= " { SITE_FULL_IMAGES } /i/hand.webp " > ' if is_patted and emoji != ' marseyunpettable ' else ' '
2023-08-04 10:13:36 +00:00
talking_html = f ' <img loading= " lazy " src= " { SITE_FULL_IMAGES } /i/talking.webp " > ' if is_talking else ' '
2023-09-07 15:26:31 +00:00
loved_html = f ' <img loading= " lazy " src= " { SITE_FULL_IMAGES } /i/love-foreground.webp " alt= " : { old } : " { attrs } ><img loading= " lazy " alt= " : { old } : " src= " { SITE_FULL_IMAGES } /i/love-background.webp " { attrs } > '
2023-08-09 08:33:14 +00:00
genocide_attr = ' cide ' if is_genocided else ' '
2023-09-07 15:26:31 +00:00
2023-08-09 08:33:14 +00:00
modifier_html = ' '
2023-08-04 10:13:36 +00:00
if ( is_talking and is_patted ) :
2023-09-07 15:26:31 +00:00
modifier_html = f ' { talking_html } { hand_html } ' if is_talking_first else f ' { hand_html } { talking_html } '
elif ( is_patted ) :
2023-08-04 10:13:36 +00:00
modifier_html = hand_html
elif ( is_talking ) :
modifier_html = talking_html
2023-09-07 15:26:31 +00:00
2023-08-10 12:36:32 +00:00
if ( is_loved ) :
modifier_html = f ' { modifier_html } { loved_html } '
2023-08-04 10:13:36 +00:00
2023-08-10 12:36:32 +00:00
if ( is_patted and emoji != ' marseyunpettable ' ) or is_talking or is_genocided or is_loved :
2023-08-04 10:13:36 +00:00
if path . isfile ( f " files/assets/images/emojis/ { emoji } .webp " ) :
2023-08-09 08:33:14 +00:00
emoji_html = f ' <span alt= " : { old } : " data-bs-toggle= " tooltip " title= " : { old } : " { genocide_attr } > { modifier_html } { emoji_partial_pat . format ( old , f " { SITE_FULL_IMAGES } /e/ { emoji } .webp " , attrs ) } </span> '
2023-08-04 10:13:36 +00:00
elif is_user :
if u := get_user ( emoji [ 1 : ] , graceful = True ) :
2023-08-09 08:33:14 +00:00
emoji_html = f ' <span alt= " : { old } : " data-bs-toggle= " tooltip " title= " : { old } : " { genocide_attr } > { modifier_html } { emoji_partial_pat . format ( old , f " /pp/ { u . id } " , attrs ) } </span> '
2022-05-04 23:09:46 +00:00
elif path . isfile ( f ' files/assets/images/emojis/ { emoji } .webp ' ) :
2023-07-22 16:24:16 +00:00
emoji_html = emoji_partial . format ( old , f ' { SITE_FULL_IMAGES } /e/ { emoji } .webp ' , attrs )
2022-05-04 23:09:46 +00:00
if emoji_html :
2023-03-19 08:33:04 +00:00
emojis_used . add ( emoji )
2023-06-24 16:03:05 +00:00
html = re . sub ( f ' (?<! " ) { i . group ( 0 ) } (?![^<]*< \ /(code|pre)>) ' , emoji_html , html )
2022-05-04 23:09:46 +00:00
return html
2023-07-30 00:42:06 +00:00
def with_sigalrm_timeout ( timeout ) :
2022-07-05 22:11:45 +00:00
' Use SIGALRM to raise an exception if the function executes for longer than timeout seconds '
2022-05-04 23:09:46 +00:00
2022-07-05 22:11:45 +00:00
# while trying to test this using time.sleep I discovered that gunicorn does in fact do some
# async so if we timeout on that (or on a db op) then the process is crashed without returning
# a proper 500 error. Oh well.
def sig_handler ( signum , frame ) :
print ( " Timeout! " , flush = True )
raise Exception ( " Timeout " )
2022-05-04 23:09:46 +00:00
2022-07-05 22:11:45 +00:00
def inner ( func ) :
2022-07-06 09:01:48 +00:00
@functools.wraps ( func )
2022-07-05 22:11:45 +00:00
def wrapped ( * args , * * kwargs ) :
signal . signal ( signal . SIGALRM , sig_handler )
signal . alarm ( timeout )
try :
return func ( * args , * * kwargs )
finally :
signal . alarm ( 0 )
return wrapped
return inner
2023-07-30 00:42:06 +00:00
def remove_cuniform ( sanitized ) :
2023-05-20 11:09:40 +00:00
if not sanitized : return " "
2023-08-12 11:00:52 +00:00
sanitized = sanitized . replace ( ' \u200e ' , ' ' ) . replace ( ' \u200b ' , ' ' ) . replace ( ' \u202e ' , ' ' ) . replace ( " \ufeff " , " " ) . replace ( " \u033f " , " " ) . replace ( " \u0589 " , " : " )
2023-05-20 11:09:40 +00:00
sanitized = sanitized . replace ( " 𒐪 " , " " ) . replace ( " 𒐫 " , " " ) . replace ( " ﷽ " , " " )
sanitized = sanitized . replace ( " \r \n " , " \n " )
2023-07-29 19:16:31 +00:00
sanitized = sanitized . replace ( " ’ " , " ' " )
2023-07-29 19:13:37 +00:00
return sanitized . strip ( )
2022-10-05 08:04:32 +00:00
2023-07-10 00:38:02 +00:00
def get_youtube_id_and_t ( url ) :
2023-01-23 02:06:56 +00:00
params = parse_qs ( urlparse ( url ) . query , keep_blank_values = True )
2023-01-28 10:42:45 +00:00
id = params . get ( ' v ' )
2023-07-10 00:38:02 +00:00
if not id : return ( None , None )
2023-01-28 10:42:45 +00:00
id = id [ 0 ]
2023-01-23 02:06:56 +00:00
t = None
split = id . split ( ' ?t= ' )
if len ( split ) == 2 :
id = split [ 0 ]
t = split [ 1 ]
2023-02-24 00:46:39 +00:00
id = id . split ( ' ? ' ) [ 0 ]
2023-08-01 07:38:58 +00:00
2023-07-10 00:38:02 +00:00
return ( id , t )
2023-02-24 00:46:39 +00:00
2023-07-10 00:38:02 +00:00
def handle_youtube_links ( url ) :
2023-08-12 14:17:55 +00:00
url = url . replace ( ' & ' , ' & ' )
2023-07-10 00:38:02 +00:00
params = parse_qs ( urlparse ( url ) . query , keep_blank_values = True )
html = None
id , t = get_youtube_id_and_t ( url )
if not id : return None
2023-01-23 02:06:56 +00:00
if yt_id_regex . fullmatch ( id ) :
if not t :
t = params . get ( ' t ' , params . get ( ' start ' , [ 0 ] ) ) [ 0 ]
2023-01-25 11:16:59 +00:00
if isinstance ( t , str ) :
2023-03-12 19:07:23 +00:00
t = t . replace ( ' s ' , ' ' ) . replace ( ' S ' , ' ' )
2023-01-25 11:16:59 +00:00
split = t . split ( ' m ' )
2023-10-12 20:49:04 +00:00
if len ( split ) == 2 :
2023-01-25 11:16:59 +00:00
minutes = int ( split [ 0 ] )
2023-10-12 20:49:04 +00:00
if split [ 1 ] : seconds = int ( split [ 1 ] )
else : seconds = 0
2023-01-25 11:16:59 +00:00
t = minutes * 60 + seconds
2023-01-23 02:06:56 +00:00
html = f ' <lite-youtube videoid= " { id } " params= " autoplay=1&modestbranding=1 '
if t :
html + = f ' &start= { int ( t ) } '
html + = ' " ></lite-youtube> '
return html
2022-12-15 19:31:30 +00:00
@with_sigalrm_timeout ( 10 )
2023-10-15 13:15:39 +00:00
def sanitize ( sanitized , golden = True , limit_pings = 0 , showmore = False , count_emojis = False , snappy = False , chat = False , blackjack = None , commenters_ping_post_id = None , obj = None , author = None ) :
2023-06-30 20:34:29 +00:00
def error ( error ) :
if chat :
return error , 403
else :
abort ( 403 , error )
2023-07-29 19:13:37 +00:00
sanitized = html_comment_regex . sub ( ' ' , sanitized )
sanitized = remove_cuniform ( sanitized )
2023-03-26 12:57:03 +00:00
if not sanitized : return ' '
2022-06-18 15:53:34 +00:00
2023-10-06 19:10:06 +00:00
v = getattr ( g , ' v ' , None )
if blackjack and execute_blackjack ( v , None , sanitized , blackjack ) :
2023-10-07 15:06:08 +00:00
return ' <p>g</p> '
2023-02-07 03:31:49 +00:00
2023-10-15 13:54:29 +00:00
if obj and not obj . is_effortpost :
2023-10-07 15:35:16 +00:00
if author . owoify :
2023-10-13 18:56:48 +00:00
sanitized = owoify ( sanitized , author . chud_phrase )
2023-10-13 18:17:50 +00:00
if author . marsify :
2023-10-13 18:49:21 +00:00
sanitized = marsify ( sanitized , author . chud_phrase )
2023-10-11 20:18:52 +00:00
2023-10-11 20:53:12 +00:00
if obj and obj . sharpened :
2023-10-26 23:16:49 +00:00
sanitized = sharpen ( sanitized , author . chud_phrase )
2023-10-11 20:53:12 +00:00
2022-05-27 18:28:54 +00:00
if ' ``` ' not in sanitized and ' <pre> ' not in sanitized :
2022-05-08 09:06:01 +00:00
sanitized = linefeeds_regex . sub ( r ' \ 1 \ n \ n \ 2 ' , sanitized )
2022-05-04 23:09:46 +00:00
2022-06-19 15:22:06 +00:00
sanitized = greentext_regex . sub ( r ' \ 1<g> \ > \ 2</g> ' , sanitized )
2023-06-24 14:25:05 +00:00
sanitized = image_sub_regex . sub ( r ' ![]( \ 1) ' , sanitized )
2022-05-04 23:09:46 +00:00
sanitized = image_check_regex . sub ( r ' \ 1 ' , sanitized )
2022-06-25 05:28:43 +00:00
sanitized = link_fix_regex . sub ( r ' \ 1https:// \ 2 ' , sanitized )
2022-05-07 05:28:51 +00:00
2022-07-20 00:07:38 +00:00
if FEATURES [ ' MARKUP_COMMANDS ' ] :
sanitized = command_regex . sub ( command_regex_matcher , sanitized )
2022-07-11 12:14:18 +00:00
2023-02-01 15:59:10 +00:00
sanitized = numbered_list_regex . sub ( r ' \ 1 \ . ' , sanitized )
2022-06-28 05:52:29 +00:00
sanitized = strikethrough_regex . sub ( r ' \ 1<del> \ 2</del> ' , sanitized )
2023-06-30 19:47:30 +00:00
sanitized = sanitized . replace ( ' _ ' , ' ▔ ' )
2023-03-12 09:30:22 +00:00
sanitized = markdown ( sanitized )
2023-07-01 21:22:13 +00:00
sanitized = sanitized . replace ( ' ▔ ' , ' _ ' ) . replace ( ' %E 2 %96% 94 ' , ' _ ' )
2023-03-12 09:30:22 +00:00
2023-10-26 16:55:29 +00:00
if obj and obj . queened :
sanitized = queenify_html ( sanitized )
2023-07-21 14:44:33 +00:00
sanitized = sanitized . replace ( ' <a href= " / % 21 ' , ' <a href= " /! ' )
2023-09-08 17:56:23 +00:00
sanitized = reddit_mention_regex . sub ( r ' <a href= " https://old.reddit.com/ \ 1 " rel= " nofollow noopener " target= " _blank " >/ \ 1</a> ' , sanitized )
2023-09-21 19:37:29 +00:00
sanitized = hole_mention_regex . sub ( r ' <a href= " / \ 1 " >/ \ 1</a> ' , sanitized )
2022-06-22 22:12:47 +00:00
2023-03-12 14:54:03 +00:00
names = set ( m . group ( 1 ) for m in mention_regex . finditer ( sanitized ) )
2023-06-30 20:34:29 +00:00
2023-08-31 10:46:07 +00:00
if limit_pings and len ( names ) > limit_pings and v . admin_level < PERMS [ ' POST_COMMENT_INFINITE_PINGS ' ] :
2023-06-30 20:34:29 +00:00
error ( " Max ping limit is 5 for comments and 50 for posts! " )
2022-08-21 17:20:09 +00:00
users_list = get_users ( names , graceful = True )
users_dict = { }
for u in users_list :
users_dict [ u . username . lower ( ) ] = u
if u . original_username :
users_dict [ u . original_username . lower ( ) ] = u
2023-05-13 04:53:14 +00:00
if u . prelock_username :
users_dict [ u . prelock_username . lower ( ) ] = u
2022-08-21 17:20:09 +00:00
def replacer ( m ) :
2023-03-12 14:54:03 +00:00
u = users_dict . get ( m . group ( 1 ) . lower ( ) )
2023-09-08 17:06:36 +00:00
if not u or ( v and u . id in v . all_twoway_blocks ) or ( v and u . has_muted ( v ) ) :
2022-08-21 17:20:09 +00:00
return m . group ( 0 )
2023-07-22 18:12:04 +00:00
return f ' <a href= " /id/ { u . id } " ><img loading= " lazy " src= " /pp/ { u . id } " >@ { u . username } </a> '
2022-08-21 17:20:09 +00:00
sanitized = mention_regex . sub ( replacer , sanitized )
2022-05-04 23:09:46 +00:00
2023-02-25 22:06:49 +00:00
if FEATURES [ ' PING_GROUPS ' ] :
2023-04-25 06:59:20 +00:00
def group_replacer ( m ) :
2023-10-15 15:34:51 +00:00
name = m . group ( 1 )
2023-04-25 06:59:20 +00:00
2023-03-01 05:32:19 +00:00
if name == ' everyone ' :
2023-04-25 06:59:20 +00:00
return f ' <a href= " /users " >! { name } </a> '
2023-07-21 14:27:45 +00:00
elif name == ' jannies ' :
return f ' <a href= " /admins " >! { name } </a> '
2023-09-05 18:24:10 +00:00
elif name == ' commenters ' and commenters_ping_post_id :
return f ' <a href= " /!commenters/ { commenters_ping_post_id } / { int ( time . time ( ) ) } " >! { name } </a> '
2023-08-11 13:34:56 +00:00
elif name == ' followers ' :
2023-10-06 19:10:06 +00:00
return f ' <a href= " /id/ { v . id } /followers " >! { name } </a> '
2023-10-15 15:34:51 +00:00
elif g . db . get ( Group , name . lower ( ) ) :
return f ' <a href= " /! { name . lower ( ) } " >! { name } </a> '
2023-03-01 05:32:19 +00:00
else :
2023-04-25 06:59:20 +00:00
return m . group ( 0 )
sanitized = group_mention_regex . sub ( group_replacer , sanitized )
2023-02-25 22:06:49 +00:00
2022-05-04 23:09:46 +00:00
soup = BeautifulSoup ( sanitized , ' lxml ' )
for tag in soup . find_all ( " img " ) :
2023-08-09 09:26:44 +00:00
if tag . get ( " src " ) and not tag [ " src " ] . startswith ( ' /pp/ ' ) and not ( snappy and tag [ " src " ] . startswith ( f ' { SITE_FULL_IMAGES } /e/ ' ) ) :
2022-07-12 20:30:00 +00:00
if not is_safe_url ( tag [ " src " ] ) :
2022-11-21 17:37:38 +00:00
a = soup . new_tag ( " a " , href = tag [ " src " ] , rel = " nofollow noopener " , target = " _blank " )
2022-07-12 20:30:00 +00:00
a . string = tag [ " src " ]
tag . replace_with ( a )
continue
2023-09-07 15:26:31 +00:00
2023-08-09 09:28:21 +00:00
del tag [ " g " ]
del tag [ " glow " ]
del tag [ " party " ]
2022-07-12 20:30:00 +00:00
2022-05-04 23:09:46 +00:00
tag [ " loading " ] = " lazy "
tag [ " data-src " ] = tag [ " src " ]
2023-03-19 16:28:19 +00:00
tag [ " src " ] = f " { SITE_FULL_IMAGES } /i/l.webp "
2023-03-12 13:02:31 +00:00
tag [ ' alt ' ] = tag [ " data-src " ]
2023-03-12 13:13:28 +00:00
tag [ ' class ' ] = " img "
2022-07-02 10:44:05 +00:00
2022-07-02 00:25:58 +00:00
if tag . parent . name != ' a ' :
2022-07-02 10:44:05 +00:00
a = soup . new_tag ( " a " , href = tag [ " data-src " ] )
if not is_site_url ( a [ " href " ] ) :
2022-10-29 21:46:30 +00:00
a [ " rel " ] = " nofollow noopener "
2022-11-21 17:37:38 +00:00
a [ " target " ] = " _blank "
2022-07-02 00:25:58 +00:00
tag = tag . replace_with ( a )
a . append ( tag )
2022-06-27 01:00:45 +00:00
2023-06-07 19:47:14 +00:00
tag [ " data-src " ] = tag [ " data-src " ]
2023-08-12 11:04:31 +00:00
tag [ " data-user-submitted " ] = " "
2023-01-01 11:30:33 +00:00
2023-06-07 05:29:43 +00:00
sanitized = str ( soup ) . replace ( ' <html><body> ' , ' ' ) . replace ( ' </body></html> ' , ' ' ) . replace ( ' /> ' , ' > ' )
2022-07-05 22:11:45 +00:00
2022-05-04 23:09:46 +00:00
sanitized = spoiler_regex . sub ( r ' <spoiler> \ 1</spoiler> ' , sanitized )
2022-07-05 22:11:45 +00:00
2023-03-19 08:33:04 +00:00
emojis_used = set ( )
2022-05-04 23:09:46 +00:00
emojis = list ( emoji_regex . finditer ( sanitized ) )
2022-09-16 16:30:34 +00:00
if len ( emojis ) > 20 : golden = False
2022-05-04 23:09:46 +00:00
captured = [ ]
for i in emojis :
if i . group ( 0 ) in captured : continue
captured . append ( i . group ( 0 ) )
old = i . group ( 0 )
2023-04-27 18:06:44 +00:00
if ' marseylong1 ' in old or ' marseylong2 ' in old or ' marseylongcockandballs ' in old or ' marseyllama1 ' in old or ' marseyllama2 ' in old :
2023-04-23 13:15:29 +00:00
new = old . lower ( ) . replace ( " > " , " class= ' mb-0 ' > " )
2022-05-04 23:09:46 +00:00
else : new = old . lower ( )
2023-03-19 08:33:04 +00:00
new = render_emoji ( new , emoji_regex2 , golden , emojis_used , True )
2022-05-04 23:09:46 +00:00
sanitized = sanitized . replace ( old , new )
emojis = list ( emoji_regex2 . finditer ( sanitized ) )
2022-09-16 16:30:34 +00:00
if len ( emojis ) > 20 : golden = False
2022-05-04 23:09:46 +00:00
2023-03-19 08:33:04 +00:00
sanitized = render_emoji ( sanitized , emoji_regex2 , golden , emojis_used )
2022-05-04 23:09:46 +00:00
2022-05-22 10:20:11 +00:00
sanitized = sanitized . replace ( ' & ' , ' & ' )
2022-05-04 23:09:46 +00:00
2023-06-24 14:25:05 +00:00
sanitized = video_sub_regex . sub ( r ' <p class= " resizable " ><video controls preload= " none " src= " \ 1 " ></video></p> ' , sanitized )
sanitized = audio_sub_regex . sub ( r ' <audio controls preload= " none " src= " \ 1 " ></audio> ' , sanitized )
2022-05-04 23:09:46 +00:00
2023-03-19 08:33:04 +00:00
if count_emojis :
2023-08-05 19:26:42 +00:00
for emoji in g . db . query ( Emoji ) . filter ( Emoji . submitter_id == None , Emoji . name . in_ ( emojis_used ) ) :
2023-03-19 08:33:04 +00:00
emoji . count + = 1
g . db . add ( emoji )
2022-05-04 23:09:46 +00:00
2023-09-29 07:15:29 +00:00
if obj :
for emoji in emojis_used :
if emoji in OVER_18_EMOJIS :
2023-10-05 10:19:50 +00:00
obj . nsfw = True
2023-09-29 07:15:29 +00:00
break
2022-05-15 08:45:57 +00:00
sanitized = sanitized . replace ( ' <p></p> ' , ' ' )
2022-05-04 23:09:46 +00:00
2023-07-31 23:17:19 +00:00
allowed_css_properties = allowed_styles . copy ( )
2023-10-06 19:10:06 +00:00
if v and v . chud :
2023-09-07 15:26:31 +00:00
allowed_css_properties . remove ( ' filter ' )
2023-03-24 11:31:12 +00:00
css_sanitizer = CSSSanitizer ( allowed_css_properties = allowed_css_properties )
2022-05-04 23:09:46 +00:00
sanitized = bleach . Cleaner ( tags = allowed_tags ,
attributes = allowed_attributes ,
protocols = [ ' http ' , ' https ' ] ,
2022-05-25 00:27:41 +00:00
css_sanitizer = css_sanitizer ,
2022-07-05 22:11:45 +00:00
filters = [ partial ( LinkifyFilter , skip_tags = [ " pre " ] ,
2023-10-07 14:58:45 +00:00
parse_email = False , url_re = sanitize_url_regex ) ]
2022-05-04 23:09:46 +00:00
) . clean ( sanitized )
2023-05-12 19:12:02 +00:00
#doing this here cuz of the linkifyfilter right above it (therefore unifying all link processing logic)
2022-05-04 23:09:46 +00:00
soup = BeautifulSoup ( sanitized , ' lxml ' )
2023-09-24 19:25:12 +00:00
has_transform = bool ( soup . select ( ' [style*=transform i] ' ) )
2023-09-22 06:51:45 +00:00
2022-05-04 23:09:46 +00:00
links = soup . find_all ( " a " )
2023-10-06 19:10:06 +00:00
if v and v . admin_level > = PERMS [ " IGNORE_DOMAIN_BAN " ] :
2023-06-02 13:48:58 +00:00
banned_domains = [ ]
else :
2023-08-11 13:15:34 +00:00
banned_domains = [ x . domain for x in g . db . query ( BannedDomain . domain ) ]
2022-05-04 23:09:46 +00:00
for link in links :
2023-05-12 19:12:02 +00:00
#remove empty links
if not link . contents or not str ( link . contents [ 0 ] ) . strip ( ) :
link . extract ( )
continue
2022-05-04 23:09:46 +00:00
href = link . get ( " href " )
if not href : continue
2023-05-12 19:12:02 +00:00
2023-08-05 21:23:18 +00:00
link [ " href " ] = normalize_url ( href )
if link . string == href :
link . string = link [ " href " ]
href = link [ " href " ]
2023-08-05 18:51:05 +00:00
2023-09-24 19:27:30 +00:00
def unlinkfy ( ) :
2023-05-12 19:30:47 +00:00
link . string = href
del link [ " href " ]
2023-05-13 02:53:51 +00:00
#\ in href right after / makes most browsers ditch site hostname and allows for a host injection bypassing the check, see <a href="/\google.com">cool</a>
if " \\ " in href :
2023-09-24 19:27:30 +00:00
unlinkfy ( )
continue
2023-05-12 19:12:02 +00:00
2023-05-12 19:30:47 +00:00
#don't allow something like this https://rdrama.net/post/78376/reminder-of-the-fact-that-our/2150032#context
2023-05-13 23:03:34 +00:00
domain = tldextract . extract ( href ) . registered_domain
2023-05-12 19:30:47 +00:00
if domain and not allowed_domain_regex . fullmatch ( domain ) :
2023-09-24 19:27:30 +00:00
unlinkfy ( )
continue
2023-05-13 02:53:51 +00:00
#check for banned domain
combined = ( domain + urlparse ( href ) . path ) . lower ( )
if any ( ( combined . startswith ( x ) for x in banned_domains ) ) :
2023-09-24 19:27:30 +00:00
unlinkfy ( )
continue
2023-05-13 02:53:51 +00:00
2023-05-14 16:45:58 +00:00
#don't allow something like this [@Aevann2](https://iplogger.org/1fRKk7)
if str ( link . string ) . startswith ( ' @ ' ) and not href . startswith ( ' / ' ) :
2023-09-24 19:27:30 +00:00
unlinkfy ( )
continue
2023-05-14 16:45:58 +00:00
#don't allow something like this [!jannies](https://iplogger.org/1fRKk7)
if str ( link . string ) . startswith ( ' ! ' ) and not href . startswith ( ' / ' ) :
2023-09-24 19:27:30 +00:00
unlinkfy ( )
continue
2023-05-14 16:45:58 +00:00
2023-05-13 02:53:51 +00:00
#don't allow something like this [https://rԁ rama.net/leaderboard](https://iplogger.org/1fRKk7)
2023-10-15 13:15:39 +00:00
if not snappy :
2023-06-08 05:22:21 +00:00
string_domain = tldextract . extract ( str ( link . string ) ) . registered_domain
if string_domain and string_domain != tldextract . extract ( href ) . registered_domain :
link . string = href
2023-05-12 19:12:02 +00:00
#insert target="_blank" and ref="nofollower noopener" for external link
if not href . startswith ( ' / ' ) and not href . startswith ( f ' { SITE_FULL } / ' ) :
link [ " target " ] = " _blank "
link [ " rel " ] = " nofollow noopener "
2023-09-22 06:51:45 +00:00
if has_transform :
del link [ " href " ]
2023-09-21 19:12:23 +00:00
2023-06-07 05:29:43 +00:00
sanitized = str ( soup ) . replace ( ' <html><body> ' , ' ' ) . replace ( ' </body></html> ' , ' ' ) . replace ( ' /> ' , ' > ' )
2022-05-04 23:09:46 +00:00
2023-08-06 02:47:12 +00:00
captured = [ ]
for i in youtube_regex . finditer ( sanitized ) :
if i . group ( 0 ) in captured : continue
captured . append ( i . group ( 0 ) )
html = handle_youtube_links ( i . group ( 1 ) )
if html :
2023-08-29 11:05:10 +00:00
if not chat :
html = f ' <p class= " resizable yt " > { html } </p> '
2023-08-06 02:47:12 +00:00
sanitized = sanitized . replace ( i . group ( 0 ) , html )
2023-02-07 03:31:49 +00:00
if ' <pre> ' not in sanitized and blackjack != " rules " :
2022-06-30 23:01:10 +00:00
sanitized = sanitized . replace ( ' \n ' , ' ' )
2022-06-29 00:55:44 +00:00
2023-03-11 07:36:41 +00:00
if showmore :
2023-01-22 23:27:24 +00:00
# Insert a show more button if the text is too long or has too many paragraphs
2023-01-24 03:56:14 +00:00
CHARLIMIT = 3000
2023-01-22 23:27:24 +00:00
pos = 0
for _ in range ( 20 ) :
2023-02-24 07:29:46 +00:00
pos = sanitized . find ( ' </p> ' , pos + 4 )
2023-01-22 23:27:24 +00:00
if pos < 0 :
break
2023-01-24 03:56:14 +00:00
if ( pos < 0 and len ( sanitized ) > CHARLIMIT ) or pos > CHARLIMIT :
pos = CHARLIMIT - 500
2023-01-22 23:27:24 +00:00
if pos > = 0 :
2023-03-10 23:30:42 +00:00
sanitized = ( sanitized [ : pos ] + showmore_regex . sub ( r ' \ 1<p><button class= " showmore " >SHOW MORE</button></p><d class= " d-none " > \ 2</d> ' , sanitized [ pos : ] , count = 1 ) )
2022-05-04 23:09:46 +00:00
2023-08-12 11:00:52 +00:00
if " style " in sanitized and " filter " in sanitized :
if sanitized . count ( " blur( " ) + sanitized . count ( " drop-shadow( " ) > 5 :
2023-10-07 19:16:40 +00:00
error ( " Max 5 usages of ' blur ' and ' drop-shadow ' ! " )
2023-08-12 11:00:52 +00:00
2022-07-02 10:12:52 +00:00
return sanitized . strip ( )
2022-05-04 23:09:46 +00:00
def allowed_attributes_emojis ( tag , name , value ) :
if tag == ' img ' :
2023-07-22 18:28:54 +00:00
if name == ' src ' :
if ' \\ ' in value : return False
if value . startswith ( ' / ' ) : return True
2023-07-22 18:29:27 +00:00
if value . startswith ( f ' { SITE_FULL_IMAGES } / ' ) : return True
2022-05-04 23:09:46 +00:00
if name == ' loading ' and value == ' lazy ' : return True
if name == ' data-bs-toggle ' and value == ' tooltip ' : return True
2023-08-05 16:06:03 +00:00
if name in { ' g ' , ' glow ' , ' party ' } and not value : return True
2022-11-26 04:52:47 +00:00
if name in { ' alt ' , ' title ' } : return True
2022-05-17 19:58:41 +00:00
if tag == ' span ' :
if name == ' data-bs-toggle ' and value == ' tooltip ' : return True
if name == ' title ' : return True
if name == ' alt ' : return True
2023-08-09 08:33:14 +00:00
if name == ' cide ' and not value : return True
2022-05-04 23:09:46 +00:00
return False
2023-08-04 13:25:25 +00:00
@with_sigalrm_timeout ( 2 )
2023-10-07 15:35:16 +00:00
def filter_emojis_only ( title , golden = True , count_emojis = False , obj = None , author = None ) :
2022-05-04 23:09:46 +00:00
2023-05-20 11:09:40 +00:00
title = title . replace ( " \n " , " " ) . replace ( " \r " , " " ) . replace ( " \t " , " " ) . replace ( ' < ' , ' < ' ) . replace ( ' > ' , ' > ' )
title = remove_cuniform ( title )
2022-05-04 23:09:46 +00:00
2023-10-15 13:54:29 +00:00
if obj and not obj . is_effortpost :
2023-10-07 15:35:16 +00:00
if author . owoify :
2023-10-13 18:56:48 +00:00
title = owoify ( title , author . chud_phrase )
2023-10-13 18:17:50 +00:00
if author . marsify :
2023-10-13 18:49:21 +00:00
title = marsify ( title , author . chud_phrase )
2023-10-11 20:18:52 +00:00
2023-10-11 20:53:12 +00:00
if obj and obj . sharpened :
2023-10-26 23:16:49 +00:00
title = sharpen ( title , author . chud_phrase )
2023-10-07 15:35:16 +00:00
2023-03-19 08:33:04 +00:00
emojis_used = set ( )
2022-06-13 18:05:24 +00:00
2023-07-22 10:10:27 +00:00
title = render_emoji ( title , emoji_regex2 , golden , emojis_used , is_title = True )
2022-06-13 18:05:24 +00:00
2023-03-19 08:33:04 +00:00
if count_emojis :
2023-08-05 19:26:42 +00:00
for emoji in g . db . query ( Emoji ) . filter ( Emoji . submitter_id == None , Emoji . name . in_ ( emojis_used ) ) :
2023-03-19 08:33:04 +00:00
emoji . count + = 1
g . db . add ( emoji )
2022-05-04 23:09:46 +00:00
2023-09-29 07:15:29 +00:00
if obj :
for emoji in emojis_used :
if emoji in OVER_18_EMOJIS :
2023-10-05 10:19:50 +00:00
obj . nsfw = True
2023-09-29 07:15:29 +00:00
break
2022-06-28 05:41:21 +00:00
title = strikethrough_regex . sub ( r ' \ 1<del> \ 2</del> ' , title )
2022-05-04 23:09:46 +00:00
2023-03-19 17:53:33 +00:00
title = bleach . clean ( title , tags = [ ' img ' , ' del ' , ' span ' ] , attributes = allowed_attributes_emojis , protocols = [ ' http ' , ' https ' ] ) . replace ( ' \n ' , ' ' )
2023-09-06 18:14:07 +00:00
if len ( title ) > POST_TITLE_HTML_LENGTH_LIMIT :
abort ( 400 , " Rendered title is too big! " )
2023-09-07 15:26:31 +00:00
2023-08-13 14:22:04 +00:00
title = title . strip ( )
2023-08-13 14:23:47 +00:00
2023-08-13 14:22:04 +00:00
return title
2022-05-25 08:43:16 +00:00
2023-08-05 18:51:05 +00:00
def is_whitelisted ( domain , k ) :
2023-08-09 15:48:38 +00:00
if domain . endswith ( ' pullpush.io ' ) :
return True
2023-08-05 18:51:05 +00:00
if ' sort ' in k . lower ( ) or ' query ' in k . lower ( ) :
return True
2023-10-23 15:17:07 +00:00
if k in { ' _x_tr_hl ' , ' _x_tr_pto ' , ' _x_tr_sl ' , ' _x_tr_tl ' , ' abstract_id ' , ' after ' , ' article ' , ' bill_id ' , ' c ' , ' clip ' , ' commentID ' , ' comments ' , ' context ' , ' count ' , ' f ' , ' fbid ' , ' format ' , ' forum_id ' , ' i ' , ' ID ' , ' id ' , ' lb ' , ' list ' , ' oldid ' , ' p ' , ' page ' , ' post_id ' , ' postid ' , ' q ' , ' run ' , ' scrollToComments ' , ' search ' , ' sl ' , ' sp ' , ' story_fbid ' , ' tab ' , ' term ' , ' text ' , ' thread_id ' , ' threadid ' , ' ticket_form_id ' , ' time_continue ' , ' title ' , ' title_no ' , ' tl ' , ' token ' , ' topic ' , ' type ' , ' tz1 ' , ' tz2 ' , ' u ' , ' udca ' , ' url ' , ' v ' , ' vid ' , ' viewkey ' } :
2023-08-05 20:39:35 +00:00
return True
if k == ' t ' and domain != ' twitter.com ' :
2023-08-05 18:51:05 +00:00
return True
2023-08-05 20:39:35 +00:00
if k == ' oid ' and domain != ' quora.com ' :
2023-08-05 18:51:05 +00:00
return True
return False
2022-06-10 20:02:15 +00:00
def normalize_url ( url ) :
2023-08-05 18:51:05 +00:00
url = unquote ( url )
2023-10-06 17:14:18 +00:00
url = url . replace ( " reddit.com/user/ " , " reddit.com/u/ " )
2023-09-18 18:26:21 +00:00
2023-10-10 19:12:19 +00:00
url = reddit_domain_regex . sub ( r ' \ 1https://old.reddit.com/ \ 5 ' , url )
2022-06-10 20:02:15 +00:00
2023-09-15 01:20:18 +00:00
url = url . replace ( " https://music.youtube.com/watch?v= " , " https://youtube.com/watch?v= " ) \
2023-04-25 08:01:51 +00:00
. replace ( " https://www.youtube.com " , " https://youtube.com " ) \
. replace ( " https://m.youtube.com " , " https://youtube.com " ) \
. replace ( " https://youtube.com/shorts/ " , " https://youtube.com/watch?v= " ) \
2023-09-29 01:11:13 +00:00
. replace ( " https://youtube.com/live/ " , " https://youtube.com/watch?v= " ) \
2023-04-25 08:01:51 +00:00
. replace ( " https://youtube.com/v/ " , " https://youtube.com/watch?v= " ) \
2022-06-23 15:47:57 +00:00
. replace ( " https://mobile.twitter.com " , " https://twitter.com " ) \
2023-08-14 07:07:31 +00:00
. replace ( " https://x.com " , " https://twitter.com " ) \
2023-08-14 07:09:27 +00:00
. replace ( " https://www.twitter.com " , " https://twitter.com " ) \
. replace ( " https://nitter.net/ " , " https://twitter.com/ " ) \
. replace ( " https://nitter.42l.fr/ " , " https://twitter.com/ " ) \
2023-08-20 02:03:31 +00:00
. replace ( " https://nitter.net/ " , " https://twitter.com/ " ) \
2022-06-23 15:47:57 +00:00
. replace ( " https://m.facebook.com " , " https://facebook.com " ) \
2023-08-22 23:16:07 +00:00
. replace ( " https://en.m.wikipedia.org " , " https://en.wikipedia.org " ) \
2022-06-23 15:47:57 +00:00
. replace ( " https://www.instagram.com " , " https://instagram.com " ) \
. replace ( " https://www.tiktok.com " , " https://tiktok.com " ) \
2022-09-29 05:36:10 +00:00
. replace ( " https://imgur.com/ " , " https://i.imgur.com/ " ) \
2023-05-05 00:17:57 +00:00
. replace ( " /giphy.gif " , " /giphy.webp " ) \
2023-08-22 23:37:39 +00:00
. replace ( ' https://www.google.com/amp/s/ ' , ' https:// ' ) \
. replace ( ' https://amp. ' , ' https:// ' ) \
2023-08-24 00:57:03 +00:00
. replace ( ' https://cnn.com/cnn/ ' , ' https://edition.cnn.com/ ' ) \
2023-08-22 23:37:39 +00:00
. replace ( ' /amp/ ' , ' / ' ) \
2023-09-13 16:35:10 +00:00
. replace ( ' https://letmegooglethat.com/?q= ' , ' https://google.com/search?q= ' ) \
. replace ( ' https://lmgtfy.app/?q= ' , ' https://google.com/search?q= ' ) \
2023-10-05 13:23:15 +00:00
. replace ( DONATE_LINK , f ' { SITE_FULL } /donate ' ) \
2023-08-22 23:37:39 +00:00
if url . endswith ( ' .amp ' ) :
url = url . split ( ' .amp ' ) [ 0 ]
2022-05-25 08:43:16 +00:00
2022-06-11 12:21:59 +00:00
url = giphy_regex . sub ( r ' \ 1.webp ' , url )
2022-05-25 08:43:16 +00:00
2023-08-06 06:57:35 +00:00
if not url . startswith ( ' / ' ) and not url . startswith ( ' https://rdrama.net ' ) and not url . startswith ( ' https://watchpeopledie.tv ' ) :
2023-08-31 13:18:52 +00:00
try : parsed_url = urlparse ( url )
except :
print ( url , flush = True )
abort ( 500 )
2023-09-15 01:20:18 +00:00
netloc = parsed_url . netloc
path = parsed_url . path . rstrip ( ' / ' )
2023-08-05 18:51:05 +00:00
qd = parse_qs ( parsed_url . query , keep_blank_values = True )
2023-09-15 01:20:18 +00:00
filtered = { }
if netloc == ' youtu.be ' :
filtered [ ' v ' ] = path . lstrip ( ' / ' )
netloc = ' youtube.com '
path = ' /watch '
filtered | = { k : val for k , val in qd . items ( ) if is_whitelisted ( netloc , k ) }
if netloc == ' old.reddit.com ' and reddit_comment_link_regex . fullmatch ( url ) :
2023-09-08 22:47:46 +00:00
filtered [ ' context ' ] = 8
2023-09-15 01:20:18 +00:00
2023-08-05 18:51:05 +00:00
new_url = ParseResult ( scheme = " https " ,
2023-09-15 01:20:18 +00:00
netloc = netloc ,
path = path ,
2023-08-05 18:51:05 +00:00
params = parsed_url . params ,
query = urlencode ( filtered , doseq = True ) ,
fragment = parsed_url . fragment )
url = urlunparse ( new_url )
2023-08-20 16:41:11 +00:00
url = url . rstrip ( ' / ' )
2023-08-05 18:51:05 +00:00
url = imgur_regex . sub ( r ' \ 1_d.webp?maxwidth=9999&fidelity=grand ' , url )
2022-06-11 09:56:16 +00:00
return url
2022-08-05 17:09:41 +00:00
2023-10-06 19:22:11 +00:00
def normalize_url_gevent ( url ) :
2023-10-24 15:14:27 +00:00
url = requests . get ( url , headers = HEADERS , timeout = 2 ) . url
2023-10-06 19:22:11 +00:00
return normalize_url ( url )
2022-08-05 17:09:41 +00:00
def validate_css ( css ) :
2023-09-07 12:22:17 +00:00
if ' @import ' in css :
return False , " CSS @import statements are not allowed! "
2022-08-05 17:09:41 +00:00
2023-02-18 20:00:39 +00:00
if ' /* ' in css :
2023-03-11 21:55:40 +00:00
return False , " CSS comments are not allowed! "
2023-02-18 19:49:11 +00:00
2023-09-07 13:04:10 +00:00
for i in css_url_regex . finditer ( css ) :
2022-08-05 17:09:41 +00:00
url = i . group ( 1 )
if not is_safe_url ( url ) :
domain = tldextract . extract ( url ) . registered_domain
2023-09-06 12:55:02 +00:00
return False , f " The domain ' { domain } ' is not allowed here! "
2022-08-05 17:09:41 +00:00
return True , " "
2023-03-22 21:39:25 +00:00
2023-06-23 13:06:45 +00:00
def torture_chud ( string , username ) :
2023-03-25 18:18:48 +00:00
if not string : return string
2023-06-23 13:06:45 +00:00
for k , l in CHUD_REPLACEMENTS . items ( ) :
2023-03-25 18:18:48 +00:00
string = string . replace ( k , l )
2023-04-24 06:58:31 +00:00
string = torture_regex . sub ( rf ' \ 1@ { username } \ 3 ' , string )
string = torture_regex2 . sub ( rf ' \ 1@ { username } is \ 3 ' , string )
string = torture_regex3 . sub ( rf " \ 1@ { username } ' s \ 3 " , string )
2023-03-25 18:18:48 +00:00
return string
2023-10-07 15:35:16 +00:00
def complies_with_chud ( obj ) :
#check for cases where u should leave
if not obj . chudded : return True
2023-10-13 19:04:45 +00:00
if obj . author . hieroglyphs : return True
2023-10-07 15:35:16 +00:00
if isinstance ( obj , Post ) :
if obj . id in ADMIGGER_THREADS : return True
2023-10-07 17:55:50 +00:00
if obj . hole == " chudrama " : return True
2023-10-07 15:35:16 +00:00
elif obj . parent_post :
if obj . parent_post in ADMIGGER_THREADS : return True
2023-10-07 17:55:50 +00:00
if obj . post . hole == " chudrama " : return True
2023-10-07 15:35:16 +00:00
#perserve old body_html to be used in checking for chud phrase
old_body_html = obj . body_html
2023-03-23 19:03:02 +00:00
2023-03-25 18:18:48 +00:00
#torture body_html
2023-04-27 14:12:56 +00:00
if obj . body_html and ' <p>&& ' not in obj . body_html and ' <p>$$ ' not in obj . body_html and ' <p>## ' not in obj . body_html :
2023-03-26 12:27:40 +00:00
soup = BeautifulSoup ( obj . body_html , ' lxml ' )
tags = soup . html . body . find_all ( lambda tag : tag . name not in { ' blockquote ' , ' codeblock ' , ' pre ' } and tag . string , recursive = False )
for tag in tags :
2023-10-07 15:35:16 +00:00
tag . string . replace_with ( torture_chud ( tag . string , obj . author . username ) )
2023-03-26 12:27:40 +00:00
obj . body_html = str ( soup ) . replace ( ' <html><body> ' , ' ' ) . replace ( ' </body></html> ' , ' ' )
2023-03-23 15:41:57 +00:00
2023-06-23 11:07:47 +00:00
#torture title_html and check for chud_phrase in plain title and leave if it's there
2023-06-07 23:26:32 +00:00
if isinstance ( obj , Post ) :
2023-10-07 15:35:16 +00:00
obj . title_html = torture_chud ( obj . title_html , obj . author . username )
if not obj . author . chud or obj . author . chud_phrase in obj . title . lower ( ) :
return True
2023-08-14 11:00:29 +00:00
2023-10-07 15:35:16 +00:00
#check for chud_phrase in body_html
if old_body_html :
excluded_tags = { ' del ' , ' sub ' , ' sup ' , ' marquee ' , ' spoiler ' , ' lite-youtube ' , ' video ' , ' audio ' }
soup = BeautifulSoup ( old_body_html , ' lxml ' )
tags = soup . html . body . find_all ( lambda tag : tag . name not in excluded_tags and not tag . attrs , recursive = False )
for tag in tags :
for text in tag . find_all ( text = True , recursive = False ) :
if not obj . author . chud or obj . author . chud_phrase in text . lower ( ) :
return True
2023-03-22 21:39:25 +00:00
2023-10-07 15:35:16 +00:00
return False