From 9a75ddee97c94c2b0e90a8d4b315249b8a124478 Mon Sep 17 00:00:00 2001
From: Aevann1 <randomname42029@gmail.com>
Date: Tue, 5 Apr 2022 17:50:56 +0200
Subject: [PATCH] dogecore

---
 files/helpers/const.py    | 18 +++++++++++-------
 files/helpers/sanitize.py |  2 ++
 files/routes/posts.py     | 28 ++++++++++++++--------------
 3 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/files/helpers/const.py b/files/helpers/const.py
index c437270c0..427bd702a 100644
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@@ -670,7 +670,7 @@ marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A)
 
 tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A)
 
-image_regex = re.compile("(^https:\/\/[\w\-.#&/=\?@%+]+\.(png|jpg|jpeg|gif|webp|maxwidth=9999|fidelity=high)($|\s))", flags=re.I|re.M|re.A)
+image_regex = re.compile("(^https:\/\/[\w\-.#&/=\?@%+]{5,250}\.(png|jpg|jpeg|gif|webp|maxwidth=9999|fidelity=high)($|\s))", flags=re.I|re.M|re.A)
 
 valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A)
 
@@ -688,13 +688,13 @@ title_regex = re.compile("[^\w ]", flags=re.A)
 
 based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A)
 
-controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]+)["< ]', flags=re.A)
+controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A)
 
 fishylinks_regex = re.compile("https?://\S+", flags=re.A)
 
 spoiler_regex = re.compile('''\|\|([^/'"]+)\|\|''', flags=re.A)
-video_regex = re.compile('<p><a href="(https:\/\/[\w\-.#&/=\?@%+]+\.(mp4|webm|mov))" rel="nofollow noopener noreferrer" target="_blank">(https:\/\/[\w\-.#&/=\?@%+]+\.(mp4|webm|mov))<\/a><\/p>', flags=re.I|re.A)
-unlinked_regex = re.compile('''(^|\s|<p>)(https:\/\/[\w\-.#&/=\?@%+]+)''', flags=re.A)
+video_regex = re.compile('<p><a href="(https:\/\/[\w\-.#&/=\?@%+]{5,250}\.(mp4|webm|mov))" rel="nofollow noopener noreferrer" target="_blank">(https:\/\/[\w\-.#&/=\?@%+]{5,250}\.(mp4|webm|mov))<\/a><\/p>', flags=re.I|re.A)
+unlinked_regex = re.compile('''(^|\s|<p>)(https:\/\/[\w\-.#&/=\?@%+]{5,250})''', flags=re.A)
 imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!</code>)', flags=re.I|re.A)
 reddit_regex = re.compile('(^|\s|<p>)\/?((r|u)\/(\w|-){3,25})', flags=re.A)
 sub_regex = re.compile('(^|\s|<p>)\/?(h\/(\w|-){3,25})', flags=re.A)
@@ -708,15 +708,17 @@ emoji_regex2 = re.compile('(?<!"):([!#A-Za-z0-9]{1,30}?):', flags=re.A)
 emoji_regex3 = re.compile('(?<!#"):([!#A-Za-z0-9]{1,30}?):', flags=re.A)
 emoji_regex4 = re.compile('(?<!"):([!A-Za-z0-9]{1,30}?):', flags=re.A)
 
-snappy_url_regex = re.compile('<a href=\"(https?:\/\/[a-z]{1,20}\.[\w:~,()\-.#&\/=?@%+]+)\" rel=\"nofollow noopener noreferrer\" target=\"_blank\">([\w:~,()\-.#&\/=?@%+]+)<\/a>', flags=re.A)
+snappy_url_regex = re.compile('<a href=\"(https?:\/\/[a-z]{1,20}\.[\w:~,()\-.#&\/=?@%+]{5,250})\" rel=\"nofollow noopener noreferrer\" target=\"_blank\">([\w:~,()\-.#&\/=?@%+]{5,250})<\/a>', flags=re.A)
 
-email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+', flags=re.A)
+email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,100})+', flags=re.A)
 
-reddit_post_regex = re.compile('(https:\/\/old\.reddit\.com\/r\/\w{1,30}\/comments\/[a-z0-9]+)[\w\-.#&/=?@%+]+', flags=re.A)
+reddit_post_regex = re.compile('(https:\/\/old\.reddit\.com\/r\/\w{1,30}\/comments\/[a-z0-9]+)[\w\-.#&/=?@%+]{5,250}', flags=re.A)
 
 utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A)
 utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A)
 
+yt_id_regex = re.compile('[A-Za-z0-9]{5,15}', flags=re.A)
+
 slur_regex = re.compile(rf"((?<=\s|>)|^)({single_words})((?=[\s<,.$]|s[\s<,.$]))", flags=re.I|re.A)
 slur_regex_upper = re.compile(rf"((?<=\s|>)|^)({single_words.upper()})((?=[\s<,.$]|S[\s<,.$]))", flags=re.A)
 torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A)
@@ -744,3 +746,5 @@ def torture_ap(body, username):
 YOUTUBE_KEY = environ.get("YOUTUBE_KEY", "").strip()
 
 ADMIGGERS = (37696,37697,37749,37833,37838)
+
+proxies = {"http":"http://127.0.0.1:18080","https":"http://127.0.0.1:18080"}
\ No newline at end of file
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index ad0de4028..001e366f3 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -303,6 +303,8 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False):
 
 		url = i.group(1)
 		yt_id = i.group(2).split('&')[0].split('%')[0]
+		if not yt_id_regex.fullmatch(yt_id): continue
+
 		replacing = f'<a href="{url}" rel="nofollow noopener noreferrer" target="_blank">{url}</a>'
 
 		params = parse_qs(urlparse(url.replace('&amp;','&')).query)
diff --git a/files/routes/posts.py b/files/routes/posts.py
index 179c458f7..50a1e2d41 100644
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@@ -612,7 +612,7 @@ def thumbnail_thread(pid):
 	headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"}
 
 	try:
-		x=requests.get(fetch_url, headers=headers, timeout=5)
+		x=requests.get(fetch_url, headers=headers, timeout=5, proxies=proxies)
 	except:
 		db.close()
 		return
@@ -663,7 +663,7 @@ def thumbnail_thread(pid):
 		for url in thumb_candidate_urls:
 
 			try:
-				image_req=requests.get(url, headers=headers, timeout=5)
+				image_req=requests.get(url, headers=headers, timeout=5, proxies=proxies)
 			except:
 				continue
 
@@ -918,18 +918,18 @@ def submit_post(v, sub=None):
 			url = unquote(url).replace('?t', '&t')
 			yt_id = url.split('https://youtube.com/watch?v=')[1].split('&')[0].split('%')[0]
 
-			req = requests.get(f"https://www.googleapis.com/youtube/v3/videos?id={yt_id}&key={YOUTUBE_KEY}&part=contentDetails", timeout=5).json()
+			if yt_id_regex.fullmatch(yt_id):
+				req = requests.get(f"https://www.googleapis.com/youtube/v3/videos?id={yt_id}&key={YOUTUBE_KEY}&part=contentDetails", timeout=5).json()
+				if req.get('items'):
+					params = parse_qs(urlparse(url).query)
+					t = params.get('t', params.get('start', [0]))[0]
+					if isinstance(t, str): t = t.replace('s','')
 
-			if req.get('items'):
-				params = parse_qs(urlparse(url).query)
-				t = params.get('t', params.get('start', [0]))[0]
-				if isinstance(t, str): t = t.replace('s','')
-
-				embed = f'<lite-youtube videoid="{yt_id}" params="autoplay=1&modestbranding=1'
-				if t:
-					try: embed += f'&start={int(t)}'
-					except: pass
-				embed += '"></lite-youtube>'
+					embed = f'<lite-youtube videoid="{yt_id}" params="autoplay=1&modestbranding=1'
+					if t:
+						try: embed += f'&start={int(t)}'
+						except: pass
+					embed += '"></lite-youtube>'
 			
 		elif app.config['SERVER_NAME'] in domain and "/post/" in url and "context" not in url:
 			id = url.split("/post/")[1]
@@ -1469,7 +1469,7 @@ def get_post_title(v):
 	url = request.values.get("url")
 	if not url: abort(400)
 
-	try: x = requests.get(url, headers=titleheaders, timeout=5, proxies={"http":"http://127.0.0.1:18080","https":"http://127.0.0.1:18080"} )
+	try: x = requests.get(url, headers=titleheaders, timeout=5, proxies=proxies)
 	except: abort(400)
 
 	soup = BeautifulSoup(x.content, 'lxml')