re-refactor normalize_url
parent
2af325de97
commit
654ddc4157
|
@ -1031,7 +1031,8 @@ image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/([a-z0-9-]+\.)*({hosts})\
|
||||||
video_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A)
|
video_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp4|webm|mov))', flags=re.A)
|
||||||
audio_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A)
|
audio_sub_regex = re.compile(f'(<p>[^<]*)(https:\/\/([a-z0-9-]+\.)*({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.(mp3|wav|ogg|aac|m4a|flac))', flags=re.A)
|
||||||
|
|
||||||
imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A)
|
imgur_regex = re.compile('(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.(jpg|png|jpeg|webp)(?!<\/(code|pre|a)>)', flags=re.I|re.A)
|
||||||
|
giphy_tenor_regex = re.compile('(https:\/\/(media\.giphy\.com|c\.tenor\.com)\/[\/\-a-z0-9]+)\.gif(?!<\/(code|pre|a)>)', flags=re.I|re.A)
|
||||||
|
|
||||||
youtube_regex = re.compile('(<p>[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A)
|
youtube_regex = re.compile('(<p>[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A)
|
||||||
|
|
||||||
|
|
|
@ -201,7 +201,7 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
||||||
sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>''', 1)
|
sanitized = sanitized.replace(i.group(0), f'''{i.group(1)}<a href="/id/{u.id}"><img loading="lazy" src="/pp/{u.id}">@{u.username}</a>''', 1)
|
||||||
|
|
||||||
|
|
||||||
sanitized = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', sanitized)
|
sanitized = normalize_url(sanitized)
|
||||||
|
|
||||||
soup = BeautifulSoup(sanitized, 'lxml')
|
soup = BeautifulSoup(sanitized, 'lxml')
|
||||||
|
|
||||||
|
@ -245,11 +245,6 @@ def sanitize(sanitized, alert=False, comment=False, edit=False):
|
||||||
|
|
||||||
sanitized = render_emoji(sanitized, emoji_regex2, edit, marseys_used)
|
sanitized = render_emoji(sanitized, emoji_regex2, edit, marseys_used)
|
||||||
|
|
||||||
for rd in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]:
|
|
||||||
sanitized = sanitized.replace(rd, "://old.reddit.com")
|
|
||||||
|
|
||||||
sanitized = sanitize_url(sanitized)
|
|
||||||
|
|
||||||
sanitized = sanitized.replace('&','&')
|
sanitized = sanitized.replace('&','&')
|
||||||
|
|
||||||
if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
|
if "https://youtube.com/watch?v=" in sanitized: sanitized = sanitized.replace("?t=", "&t=")
|
||||||
|
@ -378,8 +373,10 @@ def filter_emojis_only(title, edit=False, graceful=False):
|
||||||
if len(title) > 1500 and not graceful: abort(400)
|
if len(title) > 1500 and not graceful: abort(400)
|
||||||
else: return title
|
else: return title
|
||||||
|
|
||||||
def sanitize_url(url):
|
def normalize_url(url):
|
||||||
# NB: Used in this file to sanitize all URLs in bulk text.
|
for x in ["://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"]:
|
||||||
|
url = url.replace(x, "://old.reddit.com")
|
||||||
|
|
||||||
url = url.replace("nitter.net", "twitter.com") \
|
url = url.replace("nitter.net", "twitter.com") \
|
||||||
.replace("old.reddit.com/gallery", "reddit.com/gallery") \
|
.replace("old.reddit.com/gallery", "reddit.com/gallery") \
|
||||||
.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
|
.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
|
||||||
|
@ -397,17 +394,7 @@ def sanitize_url(url):
|
||||||
.replace("https://streamable.com/", "https://streamable.com/e/") \
|
.replace("https://streamable.com/", "https://streamable.com/e/") \
|
||||||
.replace("https://streamable.com/e/e/", "https://streamable.com/e/")
|
.replace("https://streamable.com/e/e/", "https://streamable.com/e/")
|
||||||
|
|
||||||
return url
|
url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=high', url)
|
||||||
|
url = giphy_tenor_regex.sub(r'\1.webp', url)
|
||||||
|
|
||||||
def normalize_url(url):
|
return url
|
||||||
url = sanitize_url(url)
|
|
||||||
|
|
||||||
if "/i.imgur.com/" in url:
|
|
||||||
url = url.replace(".png", ".webp").replace(".jpg", ".webp").replace(".jpeg", ".webp")
|
|
||||||
elif "/media.giphy.com/" in url or "/c.tenor.com/" in url:
|
|
||||||
url = url.replace(".gif", ".webp")
|
|
||||||
elif "/i.ibb.co/" in url:
|
|
||||||
url = url.replace(".png", ".webp").replace(".jpg", ".webp")\
|
|
||||||
.replace(".jpeg", ".webp").replace(".gif", ".webp")
|
|
||||||
|
|
||||||
return url
|
|
|
@ -728,9 +728,6 @@ def api_is_repost():
|
||||||
url = request.values.get('url')
|
url = request.values.get('url')
|
||||||
if not url: abort(400)
|
if not url: abort(400)
|
||||||
|
|
||||||
for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"):
|
|
||||||
url = url.replace(rd, "://old.reddit.com")
|
|
||||||
|
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
|
|
||||||
|
@ -819,9 +816,6 @@ def submit_post(v, sub=None):
|
||||||
embed = None
|
embed = None
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
for rd in ("://reddit.com", "://new.reddit.com", "://www.reddit.com", "://redd.it", "://libredd.it", "://teddit.net"):
|
|
||||||
url = url.replace(rd, "://old.reddit.com")
|
|
||||||
|
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue