refactor normalizing urls at runtime (I put the function in comment.py cuz there were weird import errors that i didnt wanna fix)

2022-06-23 17:47:57 +02:00 · 2022-06-23 17:47:57 +02:00 · 39cf7fc48b
parent af03f8f3bf
commit 39cf7fc48b
4 changed files with 41 additions and 39 deletions
--- a/files/classes/comment.py
+++ b/files/classes/comment.py
@ -15,6 +15,15 @@ from .votes import CommentVote
 from math import floor


+def normalize_urls_runtime(body, v):
+	
+	if v:
+		body = body.replace("https://old.reddit.com/r/", f'https://{v.reddit}/r/')
+
+		if v.nitter: body = twitter_to_nitter_regex.sub(r'https://nitter.net/\1', body)
+
+	return body
+
 def sort_comments(sort, comments):

 	if sort == 'new':
@ -370,10 +379,8 @@ class Comment(Base):
 		if body:
 			body = censor_slurs(body, v)

-			if v:
-				body = body.replace("old.reddit.com", v.reddit)
+			body = normalize_urls_runtime(body, v)

-				if v.nitter and not '/i/' in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")

 			if v and v.controversial:
 				captured = []
--- a/files/classes/submission.py
+++ b/files/classes/submission.py
@ -10,7 +10,7 @@ from files.__main__ import Base
 from files.helpers.const import *
 from files.helpers.lazy import lazy
 from .flags import Flag
-from .comment import Comment
+from .comment import Comment, normalize_urls_runtime
 from flask import g
 from .sub import *
 from .votes import CommentVote
@ -366,21 +366,22 @@ class Submission(Base):

 	@lazy
 	def realurl(self, v):
-		if v and self.url and self.url.startswith("https://old.reddit.com/"):
+		url = self.url
+
+		if not url: return ''
+
+		if url.startswith('/'): return SITE_FULL + url
+
+		url = normalize_urls_runtime(url, v)
+
+		if url.startswith("https://old.reddit.com/r/") and '/comments/' in url and "sort=" not in url:
+			if "?" in url: url += "&context=9" 
+			else: url += "?context=8"
+			if v and v.controversial: url += "&sort=controversial"
+
+		return url

-			url = self.url.replace("old.reddit.com", v.reddit)

-			if '/comments/' in url and "sort=" not in url:
-				if "?" in url: url += "&context=9" 
-				else: url += "?context=8"
-				if v.controversial: url += "&sort=controversial"
-			return url
-		elif self.url:
-			if v and v.nitter and '/i/' not in self.url and '/retweets' not in self.url: return self.url.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
-			if self.url.startswith('/'): return SITE_FULL + self.url
-			return self.url
-		else: return ""
- 
 	def realbody(self, v):
 		if self.club and not (v and (v.paid_dues or v.id == self.author_id)): return f"<p>{CC} ONLY</p>"

@ -388,10 +389,7 @@ class Submission(Base):

 		body = censor_slurs(body, v)

-		if v:
-			body = body.replace("old.reddit.com", v.reddit)
-
-			if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
+		body = normalize_urls_runtime(body, v)

 		if v and v.shadowbanned and v.id == self.author_id and 86400 > time.time() - self.created_utc > 20:
 			ti = max(int((time.time() - self.created_utc)/60), 1)
@ -454,11 +452,7 @@ class Submission(Base):

 		body = censor_slurs(body, v)

-		if v:
-			body = body.replace("old.reddit.com", v.reddit)
-
-			if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
-
+		body = normalize_urls_runtime(body, v)
 		return body

 	@lazy
--- a/files/helpers/const.py
+++ b/files/helpers/const.py
@ -1025,4 +1025,6 @@ greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A)

 ascii_only_regex = re.compile("[ -~]+", flags=re.A)

+twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{4,15}(\/status\/\d+[^/]*)?)", flags=re.A)
+
 def make_name(*args, **kwargs): return request.base_url
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@ -384,22 +384,21 @@ def filter_emojis_only(title, edit=False, graceful=False):
 	else: return title

 def normalize_url(url):
-	for x in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]:
-		url = url.replace(x, "://old.reddit.com")
+	for x in ["reddit.com", "new.reddit.com", "www.reddit.com", "redd.it", "ibredd.it", "teddit.net"]:
+		url = url.replace(f'https://{x}/r/', "https://old.reddit.com/r/")

-	url = url.replace("old.reddit.com/gallery", "reddit.com/gallery") \
-			 .replace("https://youtu.be/", "https://youtube.com/watch?v=") \
+	url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
 			 .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \
 			 .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \
-			 .replace("https://mobile.twitter", "https://twitter") \
-			 .replace("https://m.facebook", "https://facebook") \
-			 .replace("m.wikipedia.org", "wikipedia.org") \
-			 .replace("https://m.youtube", "https://youtube") \
-			 .replace("https://www.youtube", "https://youtube") \
-			 .replace("https://www.twitter", "https://twitter") \
-			 .replace("https://www.instagram", "https://instagram") \
-			 .replace("https://www.tiktok", "https://tiktok") \
-			 .replace("https://www.streamable", "https://streamable") \
+			 .replace("https://mobile.twitter.com", "https://twitter.com") \
+			 .replace("https://m.facebook.com", "https://facebook.com") \
+			 .replace("https://m.wikipedia.org", "https://wikipedia.org") \
+			 .replace("https://m.youtube.com", "https://youtube.com") \
+			 .replace("https://www.youtube.com", "https://youtube.com") \
+			 .replace("https://www.twitter.com", "https://twitter.com") \
+			 .replace("https://www.instagram.com", "https://instagram.com") \
+			 .replace("https://www.tiktok.com", "https://tiktok.com") \
+			 .replace("https://www.streamable.com", "https://streamable.com") \
 			 .replace("https://streamable.com/", "https://streamable.com/e/") \
 			 .replace("https://streamable.com/e/e/", "https://streamable.com/e/")