prevent retards from doxing themselves through tiktok links - post edition

2023-10-06 22:10:06 +03:00 · 2023-10-06 22:10:06 +03:00 · 1fd6295e8c
parent d1108f1fad
commit 1fd6295e8c
4 changed files with 97 additions and 100 deletions
--- a/files/helpers/media.py
+++ b/files/helpers/media.py
@ -172,7 +172,7 @@ def process_video(file, v):
 	else:
 		return f"{SITE_FULL}{new}"
-def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
+def process_image(filename, v, resize=0, trim=False, uploader_id=None):
 	# thumbnails are processed in a thread and not in the request context
 	# if an image is too large or webp conversion fails, it'll crash
 	# to avoid this, we'll simply return None instead
@ -248,10 +248,8 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
 					os.remove(filename)
 					return None
-	db = db or g.db
+	media = g.db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
-
+	if media: g.db.delete(media)
 	media = db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
 	if media: db.delete(media)
 	media = Media(
 		kind='image',
@ -259,7 +257,7 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
 		user_id=uploader_id or v.id,
 		size=os.stat(filename).st_size
 	)
-	db.add(media)
+	g.db.add(media)
 	if SITE == 'watchpeopledie.tv' and v and "dylan" in v.username.lower() and "hewitt" in v.username.lower():
 		gevent.spawn(delete_file, filename)
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@ -187,6 +187,7 @@ reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/c
 #gevent
 reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A)
 tiktok_t_url_regex = re.compile("https:\/\/tiktok.com\/t\/\w{9}", flags=re.A)
 #run-time
 reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/([ru])\/', flags=re.A)
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@ -423,7 +423,9 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	if not sanitized: return ''
-	if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
+	v = getattr(g, 'v', None)
 	if blackjack and execute_blackjack(v, None, sanitized, blackjack):
 		sanitized = 'g'
 	if '```' not in sanitized and '<pre>' not in sanitized:
@ -450,8 +452,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	sanitized = reddit_mention_regex.sub(r'<a href="https://old.reddit.com/\1" rel="nofollow noopener" target="_blank">/\1</a>', sanitized)
 	sanitized = hole_mention_regex.sub(r'<a href="/\1">/\1</a>', sanitized)
 	v = getattr(g, 'v', None)
 	names = set(m.group(1) for m in mention_regex.finditer(sanitized))
 	if limit_pings and len(names) > limit_pings and v.admin_level < PERMS['POST_COMMENT_INFINITE_PINGS']:
@ -485,7 +485,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 			elif name == 'commenters' and commenters_ping_post_id:
 				return f'<a href="/!commenters/{commenters_ping_post_id}/{int(time.time())}">!{name}</a>'
 			elif name == 'followers':
-				return f'<a href="/id/{g.v.id}/followers">!{name}</a>'
+				return f'<a href="/id/{v.id}/followers">!{name}</a>'
 			elif g.db.get(Group, name):
 				return f'<a href="/!{name}">!{name}</a>'
 			else:
@ -572,7 +572,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	sanitized = sanitized.replace('<p></p>', '')
 	allowed_css_properties = allowed_styles.copy()
-	if g.v and g.v.chud:
+	if v and v.chud:
 		allowed_css_properties.remove('filter')
 	css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
@ -591,7 +591,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	links = soup.find_all("a")
-	if g.v and g.v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
+	if v and v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
 		banned_domains = []
 	else:
 		banned_domains = [x.domain for x in g.db.query(BannedDomain.domain)]
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@ -287,118 +287,119 @@ def expand_url(post_url, fragment_url):
 		return f"{post_url}/{fragment_url}"
-def reddit_s_url_cleaner(url):
+def cancer_url_cleaner(url):
-	return normalize_url(requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url)
+	try: url = requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url
 	except: return url
 	return normalize_url(url)
-def surl_and_thumbnail_thread(post_url, post_body, post_body_html, pid, generate_thumb):
+def postprocess_post(post_url, post_body, post_body_html, pid, generate_thumb, edit):
-	#s_url
+	with app.app_context():
-	dirty = False
+		if post_url and (reddit_s_url_regex.fullmatch(post_url) or tiktok_t_url_regex.fullmatch(post_url)):
 			post_url = cancer_url_cleaner(post_url)
-	if post_url and reddit_s_url_regex.fullmatch(post_url):
+		if post_body:
-		post_url = reddit_s_url_cleaner(post_url)
+			li = list(reddit_s_url_regex.finditer(post_body)) + list(tiktok_t_url_regex.finditer(post_body))
-		dirty = True
+			for i in li:
 				old = i.group(0)
 				new = cancer_url_cleaner(old)
 				post_body = post_body.replace(old, new)
 				post_body_html = post_body_html.replace(old, new)
-	if post_body:
+		g.db = db_session()
 		for i in reddit_s_url_regex.finditer(post_body):
 			old = i.group(0)
 			new = reddit_s_url_cleaner(old)
 			post_body = post_body.replace(old, new)
 			post_body_html = post_body_html.replace(old, new)
 			dirty = True
-	if dirty:
+		p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
 		db = db_session()
 		p = db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
 		p.url = post_url
 		p.body = post_body
 		p.body_html = post_body_html
 		g.db.add(p)
-		db.add(p)
+		if not p.private and not edit:
-		db.commit()
+			execute_snappy(p, p.author)
 		db.close()
-	stdout.flush()
+		g.db.commit()
 		g.db.close()
 		stdout.flush()
-	#thumbnail
+		#thumbnail
-	if not generate_thumb: return
+		if not generate_thumb: return
-	if post_url.startswith('/') and '\\' not in post_url:
+		if post_url.startswith('/') and '\\' not in post_url:
-		post_url = f"{SITE_FULL}{post_url}"
+			post_url = f"{SITE_FULL}{post_url}"
-	try:
+		try:
-		x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
+			x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
-	except:
+		except:
-		return
+			return
-	if x.status_code != 200:
+		if x.status_code != 200:
-		return
+			return
-	if x.headers.get("Content-Type","").startswith("text/html"):
+		if x.headers.get("Content-Type","").startswith("text/html"):
-		soup = BeautifulSoup(x.content, 'lxml')
+			soup = BeautifulSoup(x.content, 'lxml')
-		thumb_candidate_urls = []
+			thumb_candidate_urls = []
-		for tag_name in ("twitter:image", "og:image", "thumbnail"):
+			for tag_name in ("twitter:image", "og:image", "thumbnail"):
-			tag = soup.find('meta', attrs={"name": tag_name, "content": True})
+				tag = soup.find('meta', attrs={"name": tag_name, "content": True})
-			if not tag:
+				if not tag:
-				tag = soup.find('meta', attrs={"property": tag_name, "content": True})
+					tag = soup.find('meta', attrs={"property": tag_name, "content": True})
-			if tag:
+				if tag:
-				thumb_candidate_urls.append(expand_url(post_url, tag['content']))
+					thumb_candidate_urls.append(expand_url(post_url, tag['content']))
-		for tag in soup.find_all("img", attrs={'src': True}):
+			for tag in soup.find_all("img", attrs={'src': True}):
-			thumb_candidate_urls.append(expand_url(post_url, tag['src']))
+				thumb_candidate_urls.append(expand_url(post_url, tag['src']))
-		for url in thumb_candidate_urls:
+			for url in thumb_candidate_urls:
-			try:
+				try:
-				image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
+					image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
-			except:
+				except:
 				continue
 			if image_req.status_code >= 400:
 				continue
 			if not image_req.headers.get("Content-Type","").startswith("image/"):
 				continue
 			if image_req.headers.get("Content-Type","").startswith("image/svg"):
 				continue
 			with Image.open(BytesIO(image_req.content)) as i:
 				if i.width < 30 or i.height < 30:
 					continue
-			break
+
 				if image_req.status_code >= 400:
 					continue
 				if not image_req.headers.get("Content-Type","").startswith("image/"):
 					continue
 				if image_req.headers.get("Content-Type","").startswith("image/svg"):
 					continue
 				with Image.open(BytesIO(image_req.content)) as i:
 					if i.width < 30 or i.height < 30:
 						continue
 				break
 			else:
 				return
 		elif x.headers.get("Content-Type","").startswith("image/"):
 			image_req = x
 			with Image.open(BytesIO(x.content)) as i:
 				size = len(i.fp.read())
 				if size > 8 * 1024 * 1024:
 					return
 		else:
 			return
 	elif x.headers.get("Content-Type","").startswith("image/"):
 		image_req = x
 		with Image.open(BytesIO(x.content)) as i:
 			size = len(i.fp.read())
 			if size > 8 * 1024 * 1024:
 				return
 	else:
 		return
-	name = f'/images/{time.time()}'.replace('.','') + '.webp'
+		name = f'/images/{time.time()}'.replace('.','') + '.webp'
-	with open(name, "wb") as file:
+		with open(name, "wb") as file:
-		for chunk in image_req.iter_content(1024):
+			for chunk in image_req.iter_content(1024):
-			file.write(chunk)
+				file.write(chunk)
-	db = db_session()
+		g.db = db_session()
-	p = db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
+		p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
-	thumburl = process_image(name, None, resize=99, uploader_id=p.author_id, db=db)
+		thumburl = process_image(name, None, resize=99, uploader_id=p.author_id)
-	if thumburl:
+		if thumburl:
-		p.thumburl = thumburl
+			p.thumburl = thumburl
-		db.add(p)
+			g.db.add(p)
-	db.commit()
+		g.db.commit()
-	db.close()
+		g.db.close()
-	stdout.flush()
+		stdout.flush()
@app.post("/is_repost")
@ -698,13 +699,10 @@ def submit_post(v, sub=None):
 	cache.delete_memoized(frontlist)
 	cache.delete_memoized(userpagelisting)
 	if not p.private:
 		execute_snappy(p, v)
 	g.db.flush() #Necessary, do NOT remove
 	generate_thumb = (not p.thumburl and p.url and p.domain != SITE)
-	gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, generate_thumb)
+	gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, generate_thumb, False)
 	if v.client: return p.json
 	else:
@ -1054,7 +1052,7 @@ def edit_post(pid, v):
 		process_poll_options(v, p)
-		gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, False)
+		gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, False, True)
 	if not complies_with_chud(p):