From 39cf7fc48b35191c8a4179c07a9ab61c57de309c Mon Sep 17 00:00:00 2001 From: Aevann1 Date: Thu, 23 Jun 2022 17:47:57 +0200 Subject: [PATCH] refactor normalizing urls at runtime (I put the function in comment.py cuz there were weird import errors that i didnt wanna fix) --- files/classes/comment.py | 13 +++++++++--- files/classes/submission.py | 40 ++++++++++++++++--------------------- files/helpers/const.py | 2 ++ files/helpers/sanitize.py | 25 +++++++++++------------ 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/files/classes/comment.py b/files/classes/comment.py index b3f680ee4..a7e28a4ae 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -15,6 +15,15 @@ from .votes import CommentVote from math import floor +def normalize_urls_runtime(body, v): + + if v: + body = body.replace("https://old.reddit.com/r/", f'https://{v.reddit}/r/') + + if v.nitter: body = twitter_to_nitter_regex.sub(r'https://nitter.net/\1', body) + + return body + def sort_comments(sort, comments): if sort == 'new': @@ -370,10 +379,8 @@ class Comment(Base): if body: body = censor_slurs(body, v) - if v: - body = body.replace("old.reddit.com", v.reddit) + body = normalize_urls_runtime(body, v) - if v.nitter and not '/i/' in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") if v and v.controversial: captured = [] diff --git a/files/classes/submission.py b/files/classes/submission.py index e82d769c9..aaffb1c05 100644 --- a/files/classes/submission.py +++ b/files/classes/submission.py @@ -10,7 +10,7 @@ from files.__main__ import Base from files.helpers.const import * from files.helpers.lazy import lazy from .flags import Flag -from .comment import Comment +from .comment import Comment, normalize_urls_runtime from flask import g from .sub import * from .votes import CommentVote @@ -366,21 +366,22 @@ class Submission(Base): @lazy def realurl(self, v): - if v and self.url and self.url.startswith("https://old.reddit.com/"): + url = self.url + + if not url: return '' + + if url.startswith('/'): return SITE_FULL + url + + url = normalize_urls_runtime(url, v) + + if url.startswith("https://old.reddit.com/r/") and '/comments/' in url and "sort=" not in url: + if "?" in url: url += "&context=9" + else: url += "?context=8" + if v and v.controversial: url += "&sort=controversial" + + return url - url = self.url.replace("old.reddit.com", v.reddit) - if '/comments/' in url and "sort=" not in url: - if "?" in url: url += "&context=9" - else: url += "?context=8" - if v.controversial: url += "&sort=controversial" - return url - elif self.url: - if v and v.nitter and '/i/' not in self.url and '/retweets' not in self.url: return self.url.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") - if self.url.startswith('/'): return SITE_FULL + self.url - return self.url - else: return "" - def realbody(self, v): if self.club and not (v and (v.paid_dues or v.id == self.author_id)): return f"

{CC} ONLY

" @@ -388,10 +389,7 @@ class Submission(Base): body = censor_slurs(body, v) - if v: - body = body.replace("old.reddit.com", v.reddit) - - if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") + body = normalize_urls_runtime(body, v) if v and v.shadowbanned and v.id == self.author_id and 86400 > time.time() - self.created_utc > 20: ti = max(int((time.time() - self.created_utc)/60), 1) @@ -454,11 +452,7 @@ class Submission(Base): body = censor_slurs(body, v) - if v: - body = body.replace("old.reddit.com", v.reddit) - - if v.nitter and '/i/' not in body and '/retweets' not in body: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") - + body = normalize_urls_runtime(body, v) return body @lazy diff --git a/files/helpers/const.py b/files/helpers/const.py index a950ec5cf..a81d2035d 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -1025,4 +1025,6 @@ greentext_regex = re.compile("(\n|^)>([^ >][^\n]*)", flags=re.A) ascii_only_regex = re.compile("[ -~]+", flags=re.A) +twitter_to_nitter_regex = re.compile("https:\/\/twitter.com\/(\w{4,15}(\/status\/\d+[^/]*)?)", flags=re.A) + def make_name(*args, **kwargs): return request.base_url diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index 1199eb136..8ab9a2c23 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -384,22 +384,21 @@ def filter_emojis_only(title, edit=False, graceful=False): else: return title def normalize_url(url): - for x in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]: - url = url.replace(x, "://old.reddit.com") + for x in ["reddit.com", "new.reddit.com", "www.reddit.com", "redd.it", "ibredd.it", "teddit.net"]: + url = url.replace(f'https://{x}/r/', "https://old.reddit.com/r/") - url = url.replace("old.reddit.com/gallery", "reddit.com/gallery") \ - .replace("https://youtu.be/", "https://youtube.com/watch?v=") \ + url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \ .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \ .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \ - .replace("https://mobile.twitter", "https://twitter") \ - .replace("https://m.facebook", "https://facebook") \ - .replace("m.wikipedia.org", "wikipedia.org") \ - .replace("https://m.youtube", "https://youtube") \ - .replace("https://www.youtube", "https://youtube") \ - .replace("https://www.twitter", "https://twitter") \ - .replace("https://www.instagram", "https://instagram") \ - .replace("https://www.tiktok", "https://tiktok") \ - .replace("https://www.streamable", "https://streamable") \ + .replace("https://mobile.twitter.com", "https://twitter.com") \ + .replace("https://m.facebook.com", "https://facebook.com") \ + .replace("https://m.wikipedia.org", "https://wikipedia.org") \ + .replace("https://m.youtube.com", "https://youtube.com") \ + .replace("https://www.youtube.com", "https://youtube.com") \ + .replace("https://www.twitter.com", "https://twitter.com") \ + .replace("https://www.instagram.com", "https://instagram.com") \ + .replace("https://www.tiktok.com", "https://tiktok.com") \ + .replace("https://www.streamable.com", "https://streamable.com") \ .replace("https://streamable.com/", "https://streamable.com/e/") \ .replace("https://streamable.com/e/e/", "https://streamable.com/e/")