From 39cf7fc48b35191c8a4179c07a9ab61c57de309c Mon Sep 17 00:00:00 2001
From: Aevann1 <randomname42029@gmail.com>
Date: Thu, 23 Jun 2022 17:47:57 +0200
Subject: [PATCH] refactor normalizing urls at runtime (I put the function in
 comment.py cuz there were weird import errors that i didnt wanna fix)

---
 files/classes/comment.py    | 13 +++++++++---
 files/classes/submission.py | 40 ++++++++++++++++---------------------
 files/helpers/const.py      |  2 ++
 files/helpers/sanitize.py   | 25 +++++++++++------------
 4 files changed, 41 insertions(+), 39 deletions(-)
diff --git a/files/classes/comment.py b/files/classes/comment.py
index b3f680ee4..a7e28a4ae 100644
--- a/files/classes/comment.py
+++ b/files/classes/comment.py
@@ -15,6 +15,15 @@ from .votes import CommentVote
 from math import floor
 
 
+def normalize_urls_runtime(body, v):
+	
+	if v:
+		body = body.replace("https://old.reddit.com/r/", f'https://{v.reddit}/r/')
+
+		if v.nitter: body = twitter_to_nitter_regex.sub(r'https://nitter.net/\1', body)
+
+	return body
+
 def sort_comments(sort, comments):
 
 	if sort == 'new':
@@ -370,10 +379,8 @@ class Comment(Base):
 		if body:
 			body = censor_slurs(body, v)
 
-			if v:
-				body = body.replace("old.reddit.com", v.reddit)
+			body = normalize_urls_runtime(body, v)
 
-				if v.nitter and not '/i/' in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
 
 			if v and v.controversial:
 				captured = []
diff --git a/files/classes/submission.py b/files/classes/submission.py
index e82d769c9..aaffb1c05 100644
--- a/files/classes/submission.py
+++ b/files/classes/submission.py
@@ -10,7 +10,7 @@ from files.__main__ import Base
 from files.helpers.const import *
 from files.helpers.lazy import lazy
 from .flags import Flag
-from .comment import Comment
+from .comment import Comment, normalize_urls_runtime
 from flask import g
 from .sub import *
 from .votes import CommentVote
@@ -366,21 +366,22 @@ class Submission(Base):
 
 	@lazy
 	def realurl(self, v):
-		if v and self.url and self.url.startswith("https://old.reddit.com/"):
+		url = self.url
+
+		if not url: return ''
+
+		if url.startswith('/'): return SITE_FULL + url
+
+		url = normalize_urls_runtime(url, v)
+
+		if url.startswith("https://old.reddit.com/r/") and '/comments/' in url and "sort=" not in url:
+			if "?" in url: url += "&context=9" 
+			else: url += "?context=8"
+			if v and v.controversial: url += "&sort=controversial"
+
+		return url
 
-			url = self.url.replace("old.reddit.com", v.reddit)
 
-			if '/comments/' in url and "sort=" not in url:
-				if "?" in url: url += "&context=9" 
-				else: url += "?context=8"
-				if v.controversial: url += "&sort=controversial"
-			return url
-		elif self.url:
-			if v and v.nitter and '/i/' not in self.url and '/retweets' not in self.url: return self.url.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
-			if self.url.startswith('/'): return SITE_FULL + self.url
-			return self.url
-		else: return ""
- 
 	def realbody(self, v):
 		if self.club and not (v and (v.paid_dues or v.id == self.author_id)): return f"<p>{CC} ONLY</p>"
 
@@ -388,10 +389,7 @@ class Submission(Base):
 
 		body = censor_slurs(body, v)
 
-		if v:
-			body = body.replace("old.reddit.com", v.reddit)
-
-			if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
+		body = normalize_urls_runtime(body, v)
 
 		if v and v.shadowbanned and v.id == self.author_id and 86400 > time.time() - self.created_utc > 20:
 			ti = max(int((time.time() - self.created_utc)/60), 1)
@@ -454,11 +452,7 @@ class Submission(Base):
 
 		body = censor_slurs(body, v)
 
-		if v:
-			body = body.replace("old.reddit.com", v.reddit)
-
-			if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
-
+		body = normalize_urls_runtime(body, v)
 		return body
 
 	@lazy
diff --git a/files/helpers/const.py b/files/helpers/const.py
index a950ec5cf..a81d2035d 100644
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@@ -1025,4 +1025,6 @@ greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A)
 
 ascii_only_regex = re.compile("[ -~]+", flags=re.A)
 
+twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{4,15}(\/status\/\d+[^/]*)?)", flags=re.A)
+
 def make_name(*args, **kwargs): return request.base_url
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 1199eb136..8ab9a2c23 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -384,22 +384,21 @@ def filter_emojis_only(title, edit=False, graceful=False):
 	else: return title
 
 def normalize_url(url):
-	for x in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]:
-		url = url.replace(x, "://old.reddit.com")
+	for x in ["reddit.com", "new.reddit.com", "www.reddit.com", "redd.it", "ibredd.it", "teddit.net"]:
+		url = url.replace(f'https://{x}/r/', "https://old.reddit.com/r/")
 
-	url = url.replace("old.reddit.com/gallery", "reddit.com/gallery") \
-			 .replace("https://youtu.be/", "https://youtube.com/watch?v=") \
+	url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
 			 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
 			 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
-			 .replace("https://mobile.twitter", "https://twitter") \
-			 .replace("https://m.facebook", "https://facebook") \
-			 .replace("m.wikipedia.org", "wikipedia.org") \
-			 .replace("https://m.youtube", "https://youtube") \
-			 .replace("https://www.youtube", "https://youtube") \
-			 .replace("https://www.twitter", "https://twitter") \
-			 .replace("https://www.instagram", "https://instagram") \
-			 .replace("https://www.tiktok", "https://tiktok") \
-			 .replace("https://www.streamable", "https://streamable") \
+			 .replace("https://mobile.twitter.com", "https://twitter.com") \
+			 .replace("https://m.facebook.com", "https://facebook.com") \
+			 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
+			 .replace("https://m.youtube.com", "https://youtube.com") \
+			 .replace("https://www.youtube.com", "https://youtube.com") \
+			 .replace("https://www.twitter.com", "https://twitter.com") \
+			 .replace("https://www.instagram.com", "https://instagram.com") \
+			 .replace("https://www.tiktok.com", "https://tiktok.com") \
+			 .replace("https://www.streamable.com", "https://streamable.com") \
 			 .replace("https://streamable.com/", "https://streamable.com/e/") \
 			 .replace("https://streamable.com/e/e/", "https://streamable.com/e/")