prevent retards from doxing themselves through tiktok links - post edition

2023-10-06 22:10:06 +03:00 · 2023-10-06 22:10:06 +03:00 · 1fd6295e8c
parent d1108f1fad
commit 1fd6295e8c
4 changed files with 97 additions and 100 deletions
--- a/files/helpers/media.py
+++ b/files/helpers/media.py
@ -172,7 +172,7 @@ def process_video(file, v):
 	else:
 		return f"{SITE_FULL}{new}"

-def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
+def process_image(filename, v, resize=0, trim=False, uploader_id=None):
 	# thumbnails are processed in a thread and not in the request context
 	# if an image is too large or webp conversion fails, it'll crash
 	# to avoid this, we'll simply return None instead
@ -248,10 +248,8 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
 					os.remove(filename)
 					return None

-	db = db or g.db
-
-	media = db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
-	if media: db.delete(media)
+	media = g.db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
+	if media: g.db.delete(media)

 	media = Media(
 		kind='image',
@ -259,7 +257,7 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
 		user_id=uploader_id or v.id,
 		size=os.stat(filename).st_size
 	)
-	db.add(media)
+	g.db.add(media)

 	if SITE == 'watchpeopledie.tv' and v and "dylan" in v.username.lower() and "hewitt" in v.username.lower():
 		gevent.spawn(delete_file, filename)
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@ -187,6 +187,7 @@ reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/c

 #gevent
 reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A)
+tiktok_t_url_regex = re.compile("https:\/\/tiktok.com\/t\/\w{9}", flags=re.A)

 #run-time
 reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/([ru])\/', flags=re.A)
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@ -423,7 +423,9 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis

 	if not sanitized: return ''

-	if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
+	v = getattr(g, 'v', None)
+
+	if blackjack and execute_blackjack(v, None, sanitized, blackjack):
 		sanitized = 'g'

 	if '```' not in sanitized and '<pre>' not in sanitized:
@ -450,8 +452,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	sanitized = reddit_mention_regex.sub(r'<a href="https://old.reddit.com/\1" rel="nofollow noopener" target="_blank">/\1</a>', sanitized)
 	sanitized = hole_mention_regex.sub(r'<a href="/\1">/\1</a>', sanitized)

-	v = getattr(g, 'v', None)
-
 	names = set(m.group(1) for m in mention_regex.finditer(sanitized))

 	if limit_pings and len(names) > limit_pings and v.admin_level < PERMS['POST_COMMENT_INFINITE_PINGS']:
@ -485,7 +485,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 			elif name == 'commenters' and commenters_ping_post_id:
 				return f'<a href="/!commenters/{commenters_ping_post_id}/{int(time.time())}">!{name}</a>'
 			elif name == 'followers':
-				return f'<a href="/id/{g.v.id}/followers">!{name}</a>'
+				return f'<a href="/id/{v.id}/followers">!{name}</a>'
 			elif g.db.get(Group, name):
 				return f'<a href="/!{name}">!{name}</a>'
 			else:
@ -572,7 +572,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
 	sanitized = sanitized.replace('<p></p>', '')

 	allowed_css_properties = allowed_styles.copy()
-	if g.v and g.v.chud:
+	if v and v.chud:
 		allowed_css_properties.remove('filter')

 	css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
@ -591,7 +591,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis

 	links = soup.find_all("a")

-	if g.v and g.v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
+	if v and v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
 		banned_domains = []
 	else:
 		banned_domains = [x.domain for x in g.db.query(BannedDomain.domain)]
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@ -287,118 +287,119 @@ def expand_url(post_url, fragment_url):
 		return f"{post_url}/{fragment_url}"


-def reddit_s_url_cleaner(url):
-	return normalize_url(requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url)
+def cancer_url_cleaner(url):
+	try: url = requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url
+	except: return url
+	return normalize_url(url)

-def surl_and_thumbnail_thread(post_url, post_body, post_body_html, pid, generate_thumb):
-	#s_url
-	dirty = False
+def postprocess_post(post_url, post_body, post_body_html, pid, generate_thumb, edit):
+	with app.app_context():
+		if post_url and (reddit_s_url_regex.fullmatch(post_url) or tiktok_t_url_regex.fullmatch(post_url)):
+			post_url = cancer_url_cleaner(post_url)

-	if post_url and reddit_s_url_regex.fullmatch(post_url):
-		post_url = reddit_s_url_cleaner(post_url)
-		dirty = True
+		if post_body:
+			li = list(reddit_s_url_regex.finditer(post_body)) + list(tiktok_t_url_regex.finditer(post_body))
+			for i in li:
+				old = i.group(0)
+				new = cancer_url_cleaner(old)
+				post_body = post_body.replace(old, new)
+				post_body_html = post_body_html.replace(old, new)

-	if post_body:
-		for i in reddit_s_url_regex.finditer(post_body):
-			old = i.group(0)
-			new = reddit_s_url_cleaner(old)
-			post_body = post_body.replace(old, new)
-			post_body_html = post_body_html.replace(old, new)
-			dirty = True
+		g.db = db_session()

-	if dirty:
-		db = db_session()
-
-		p = db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
+		p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
 		p.url = post_url
 		p.body = post_body
 		p.body_html = post_body_html
+		g.db.add(p)

-		db.add(p)
-		db.commit()
-		db.close()
+		if not p.private and not edit:
+			execute_snappy(p, p.author)

-	stdout.flush()
+		g.db.commit()
+		g.db.close()
+
+		stdout.flush()


-	#thumbnail
-	if not generate_thumb: return
+		#thumbnail
+		if not generate_thumb: return

-	if post_url.startswith('/') and '\\' not in post_url:
-		post_url = f"{SITE_FULL}{post_url}"
+		if post_url.startswith('/') and '\\' not in post_url:
+			post_url = f"{SITE_FULL}{post_url}"

-	try:
-		x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
-	except:
-		return
+		try:
+			x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
+		except:
+			return

-	if x.status_code != 200:
-		return
+		if x.status_code != 200:
+			return

-	if x.headers.get("Content-Type","").startswith("text/html"):
-		soup = BeautifulSoup(x.content, 'lxml')
+		if x.headers.get("Content-Type","").startswith("text/html"):
+			soup = BeautifulSoup(x.content, 'lxml')

-		thumb_candidate_urls = []
+			thumb_candidate_urls = []

-		for tag_name in ("twitter:image", "og:image", "thumbnail"):
-			tag = soup.find('meta', attrs={"name": tag_name, "content": True})
-			if not tag:
-				tag = soup.find('meta', attrs={"property": tag_name, "content": True})
-			if tag:
-				thumb_candidate_urls.append(expand_url(post_url, tag['content']))
+			for tag_name in ("twitter:image", "og:image", "thumbnail"):
+				tag = soup.find('meta', attrs={"name": tag_name, "content": True})
+				if not tag:
+					tag = soup.find('meta', attrs={"property": tag_name, "content": True})
+				if tag:
+					thumb_candidate_urls.append(expand_url(post_url, tag['content']))

-		for tag in soup.find_all("img", attrs={'src': True}):
-			thumb_candidate_urls.append(expand_url(post_url, tag['src']))
+			for tag in soup.find_all("img", attrs={'src': True}):
+				thumb_candidate_urls.append(expand_url(post_url, tag['src']))

-		for url in thumb_candidate_urls:
-			try:
-				image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
-			except:
-				continue
-
-			if image_req.status_code >= 400:
-				continue
-
-			if not image_req.headers.get("Content-Type","").startswith("image/"):
-				continue
-
-			if image_req.headers.get("Content-Type","").startswith("image/svg"):
-				continue
-
-			with Image.open(BytesIO(image_req.content)) as i:
-				if i.width < 30 or i.height < 30:
+			for url in thumb_candidate_urls:
+				try:
+					image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
+				except:
 					continue
-			break
+
+				if image_req.status_code >= 400:
+					continue
+
+				if not image_req.headers.get("Content-Type","").startswith("image/"):
+					continue
+
+				if image_req.headers.get("Content-Type","").startswith("image/svg"):
+					continue
+
+				with Image.open(BytesIO(image_req.content)) as i:
+					if i.width < 30 or i.height < 30:
+						continue
+				break
+			else:
+				return
+		elif x.headers.get("Content-Type","").startswith("image/"):
+			image_req = x
+			with Image.open(BytesIO(x.content)) as i:
+				size = len(i.fp.read())
+				if size > 8 * 1024 * 1024:
+					return
 		else:
 			return
-	elif x.headers.get("Content-Type","").startswith("image/"):
-		image_req = x
-		with Image.open(BytesIO(x.content)) as i:
-			size = len(i.fp.read())
-			if size > 8 * 1024 * 1024:
-				return
-	else:
-		return

-	name = f'/images/{time.time()}'.replace('.','') + '.webp'
+		name = f'/images/{time.time()}'.replace('.','') + '.webp'

-	with open(name, "wb") as file:
-		for chunk in image_req.iter_content(1024):
-			file.write(chunk)
+		with open(name, "wb") as file:
+			for chunk in image_req.iter_content(1024):
+				file.write(chunk)

-	db = db_session()
+		g.db = db_session()

-	p = db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
+		p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()

-	thumburl = process_image(name, None, resize=99, uploader_id=p.author_id, db=db)
+		thumburl = process_image(name, None, resize=99, uploader_id=p.author_id)

-	if thumburl:
-		p.thumburl = thumburl
-		db.add(p)
+		if thumburl:
+			p.thumburl = thumburl
+			g.db.add(p)

-	db.commit()
-	db.close()
-	stdout.flush()
+		g.db.commit()
+		g.db.close()
+		stdout.flush()


@app.post("/is_repost")
@ -698,13 +699,10 @@ def submit_post(v, sub=None):
 	cache.delete_memoized(frontlist)
 	cache.delete_memoized(userpagelisting)

-	if not p.private:
-		execute_snappy(p, v)
-
 	g.db.flush() #Necessary, do NOT remove

 	generate_thumb = (not p.thumburl and p.url and p.domain != SITE)
-	gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, generate_thumb)
+	gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, generate_thumb, False)

 	if v.client: return p.json
 	else:
@ -1054,7 +1052,7 @@ def edit_post(pid, v):

 		process_poll_options(v, p)

-		gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, False)
+		gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, False, True)


 	if not complies_with_chud(p):