forked from rDrama/rDrama
1
0
Fork 0

prevent retards from doxing themselves through tiktok links - post edition

master
Aevann 2023-10-06 22:10:06 +03:00
parent d1108f1fad
commit 1fd6295e8c
4 changed files with 97 additions and 100 deletions

View File

@ -172,7 +172,7 @@ def process_video(file, v):
else: else:
return f"{SITE_FULL}{new}" return f"{SITE_FULL}{new}"
def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None): def process_image(filename, v, resize=0, trim=False, uploader_id=None):
# thumbnails are processed in a thread and not in the request context # thumbnails are processed in a thread and not in the request context
# if an image is too large or webp conversion fails, it'll crash # if an image is too large or webp conversion fails, it'll crash
# to avoid this, we'll simply return None instead # to avoid this, we'll simply return None instead
@ -248,10 +248,8 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
os.remove(filename) os.remove(filename)
return None return None
db = db or g.db media = g.db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
if media: g.db.delete(media)
media = db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
if media: db.delete(media)
media = Media( media = Media(
kind='image', kind='image',
@ -259,7 +257,7 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
user_id=uploader_id or v.id, user_id=uploader_id or v.id,
size=os.stat(filename).st_size size=os.stat(filename).st_size
) )
db.add(media) g.db.add(media)
if SITE == 'watchpeopledie.tv' and v and "dylan" in v.username.lower() and "hewitt" in v.username.lower(): if SITE == 'watchpeopledie.tv' and v and "dylan" in v.username.lower() and "hewitt" in v.username.lower():
gevent.spawn(delete_file, filename) gevent.spawn(delete_file, filename)

View File

@ -187,6 +187,7 @@ reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/c
#gevent #gevent
reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A) reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A)
tiktok_t_url_regex = re.compile("https:\/\/tiktok.com\/t\/\w{9}", flags=re.A)
#run-time #run-time
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/([ru])\/', flags=re.A) reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/([ru])\/', flags=re.A)

View File

@ -423,7 +423,9 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
if not sanitized: return '' if not sanitized: return ''
if blackjack and execute_blackjack(g.v, None, sanitized, blackjack): v = getattr(g, 'v', None)
if blackjack and execute_blackjack(v, None, sanitized, blackjack):
sanitized = 'g' sanitized = 'g'
if '```' not in sanitized and '<pre>' not in sanitized: if '```' not in sanitized and '<pre>' not in sanitized:
@ -450,8 +452,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = reddit_mention_regex.sub(r'<a href="https://old.reddit.com/\1" rel="nofollow noopener" target="_blank">/\1</a>', sanitized) sanitized = reddit_mention_regex.sub(r'<a href="https://old.reddit.com/\1" rel="nofollow noopener" target="_blank">/\1</a>', sanitized)
sanitized = hole_mention_regex.sub(r'<a href="/\1">/\1</a>', sanitized) sanitized = hole_mention_regex.sub(r'<a href="/\1">/\1</a>', sanitized)
v = getattr(g, 'v', None)
names = set(m.group(1) for m in mention_regex.finditer(sanitized)) names = set(m.group(1) for m in mention_regex.finditer(sanitized))
if limit_pings and len(names) > limit_pings and v.admin_level < PERMS['POST_COMMENT_INFINITE_PINGS']: if limit_pings and len(names) > limit_pings and v.admin_level < PERMS['POST_COMMENT_INFINITE_PINGS']:
@ -485,7 +485,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
elif name == 'commenters' and commenters_ping_post_id: elif name == 'commenters' and commenters_ping_post_id:
return f'<a href="/!commenters/{commenters_ping_post_id}/{int(time.time())}">!{name}</a>' return f'<a href="/!commenters/{commenters_ping_post_id}/{int(time.time())}">!{name}</a>'
elif name == 'followers': elif name == 'followers':
return f'<a href="/id/{g.v.id}/followers">!{name}</a>' return f'<a href="/id/{v.id}/followers">!{name}</a>'
elif g.db.get(Group, name): elif g.db.get(Group, name):
return f'<a href="/!{name}">!{name}</a>' return f'<a href="/!{name}">!{name}</a>'
else: else:
@ -572,7 +572,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = sanitized.replace('<p></p>', '') sanitized = sanitized.replace('<p></p>', '')
allowed_css_properties = allowed_styles.copy() allowed_css_properties = allowed_styles.copy()
if g.v and g.v.chud: if v and v.chud:
allowed_css_properties.remove('filter') allowed_css_properties.remove('filter')
css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties) css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
@ -591,7 +591,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
links = soup.find_all("a") links = soup.find_all("a")
if g.v and g.v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]: if v and v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
banned_domains = [] banned_domains = []
else: else:
banned_domains = [x.domain for x in g.db.query(BannedDomain.domain)] banned_domains = [x.domain for x in g.db.query(BannedDomain.domain)]

View File

@ -287,118 +287,119 @@ def expand_url(post_url, fragment_url):
return f"{post_url}/{fragment_url}" return f"{post_url}/{fragment_url}"
def reddit_s_url_cleaner(url): def cancer_url_cleaner(url):
return normalize_url(requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url) try: url = requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url
except: return url
return normalize_url(url)
def surl_and_thumbnail_thread(post_url, post_body, post_body_html, pid, generate_thumb): def postprocess_post(post_url, post_body, post_body_html, pid, generate_thumb, edit):
#s_url with app.app_context():
dirty = False if post_url and (reddit_s_url_regex.fullmatch(post_url) or tiktok_t_url_regex.fullmatch(post_url)):
post_url = cancer_url_cleaner(post_url)
if post_url and reddit_s_url_regex.fullmatch(post_url): if post_body:
post_url = reddit_s_url_cleaner(post_url) li = list(reddit_s_url_regex.finditer(post_body)) + list(tiktok_t_url_regex.finditer(post_body))
dirty = True for i in li:
old = i.group(0)
new = cancer_url_cleaner(old)
post_body = post_body.replace(old, new)
post_body_html = post_body_html.replace(old, new)
if post_body: g.db = db_session()
for i in reddit_s_url_regex.finditer(post_body):
old = i.group(0)
new = reddit_s_url_cleaner(old)
post_body = post_body.replace(old, new)
post_body_html = post_body_html.replace(old, new)
dirty = True
if dirty: p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
db = db_session()
p = db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
p.url = post_url p.url = post_url
p.body = post_body p.body = post_body
p.body_html = post_body_html p.body_html = post_body_html
g.db.add(p)
db.add(p) if not p.private and not edit:
db.commit() execute_snappy(p, p.author)
db.close()
stdout.flush() g.db.commit()
g.db.close()
stdout.flush()
#thumbnail #thumbnail
if not generate_thumb: return if not generate_thumb: return
if post_url.startswith('/') and '\\' not in post_url: if post_url.startswith('/') and '\\' not in post_url:
post_url = f"{SITE_FULL}{post_url}" post_url = f"{SITE_FULL}{post_url}"
try: try:
x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies) x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
except: except:
return return
if x.status_code != 200: if x.status_code != 200:
return return
if x.headers.get("Content-Type","").startswith("text/html"): if x.headers.get("Content-Type","").startswith("text/html"):
soup = BeautifulSoup(x.content, 'lxml') soup = BeautifulSoup(x.content, 'lxml')
thumb_candidate_urls = [] thumb_candidate_urls = []
for tag_name in ("twitter:image", "og:image", "thumbnail"): for tag_name in ("twitter:image", "og:image", "thumbnail"):
tag = soup.find('meta', attrs={"name": tag_name, "content": True}) tag = soup.find('meta', attrs={"name": tag_name, "content": True})
if not tag: if not tag:
tag = soup.find('meta', attrs={"property": tag_name, "content": True}) tag = soup.find('meta', attrs={"property": tag_name, "content": True})
if tag: if tag:
thumb_candidate_urls.append(expand_url(post_url, tag['content'])) thumb_candidate_urls.append(expand_url(post_url, tag['content']))
for tag in soup.find_all("img", attrs={'src': True}): for tag in soup.find_all("img", attrs={'src': True}):
thumb_candidate_urls.append(expand_url(post_url, tag['src'])) thumb_candidate_urls.append(expand_url(post_url, tag['src']))
for url in thumb_candidate_urls: for url in thumb_candidate_urls:
try: try:
image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies) image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
except: except:
continue
if image_req.status_code >= 400:
continue
if not image_req.headers.get("Content-Type","").startswith("image/"):
continue
if image_req.headers.get("Content-Type","").startswith("image/svg"):
continue
with Image.open(BytesIO(image_req.content)) as i:
if i.width < 30 or i.height < 30:
continue continue
break
if image_req.status_code >= 400:
continue
if not image_req.headers.get("Content-Type","").startswith("image/"):
continue
if image_req.headers.get("Content-Type","").startswith("image/svg"):
continue
with Image.open(BytesIO(image_req.content)) as i:
if i.width < 30 or i.height < 30:
continue
break
else:
return
elif x.headers.get("Content-Type","").startswith("image/"):
image_req = x
with Image.open(BytesIO(x.content)) as i:
size = len(i.fp.read())
if size > 8 * 1024 * 1024:
return
else: else:
return return
elif x.headers.get("Content-Type","").startswith("image/"):
image_req = x
with Image.open(BytesIO(x.content)) as i:
size = len(i.fp.read())
if size > 8 * 1024 * 1024:
return
else:
return
name = f'/images/{time.time()}'.replace('.','') + '.webp' name = f'/images/{time.time()}'.replace('.','') + '.webp'
with open(name, "wb") as file: with open(name, "wb") as file:
for chunk in image_req.iter_content(1024): for chunk in image_req.iter_content(1024):
file.write(chunk) file.write(chunk)
db = db_session() g.db = db_session()
p = db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none() p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
thumburl = process_image(name, None, resize=99, uploader_id=p.author_id, db=db) thumburl = process_image(name, None, resize=99, uploader_id=p.author_id)
if thumburl: if thumburl:
p.thumburl = thumburl p.thumburl = thumburl
db.add(p) g.db.add(p)
db.commit() g.db.commit()
db.close() g.db.close()
stdout.flush() stdout.flush()
@app.post("/is_repost") @app.post("/is_repost")
@ -698,13 +699,10 @@ def submit_post(v, sub=None):
cache.delete_memoized(frontlist) cache.delete_memoized(frontlist)
cache.delete_memoized(userpagelisting) cache.delete_memoized(userpagelisting)
if not p.private:
execute_snappy(p, v)
g.db.flush() #Necessary, do NOT remove g.db.flush() #Necessary, do NOT remove
generate_thumb = (not p.thumburl and p.url and p.domain != SITE) generate_thumb = (not p.thumburl and p.url and p.domain != SITE)
gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, generate_thumb) gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, generate_thumb, False)
if v.client: return p.json if v.client: return p.json
else: else:
@ -1054,7 +1052,7 @@ def edit_post(pid, v):
process_poll_options(v, p) process_poll_options(v, p)
gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, False) gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, False, True)
if not complies_with_chud(p): if not complies_with_chud(p):