From a560958c3adab652276b24e69f4ffd2794f1c7a3 Mon Sep 17 00:00:00 2001 From: Aevann Date: Mon, 23 Jan 2023 04:06:56 +0200 Subject: [PATCH] fix youtube timestamps --- files/helpers/regex.py | 2 +- files/helpers/sanitize.py | 37 +++++++++++++++++++++++++------------ files/routes/posts.py | 17 ++--------------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 0eee98294..08a9b4282 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -85,7 +85,7 @@ imgur_regex = re.compile(f'(https:\/\/i\.imgur\.com\/[a-z0-9]+)\.({image_regex_e giphy_regex = re.compile('(https:\/\/media\.giphy\.com\/media\/[a-z0-9]+\/giphy)\.gif', flags=re.I|re.A) -youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?v\=([a-z0-9-_]{5,20})[\w\-.#&/=\?@%+]*)', flags=re.I|re.A) +youtube_regex = re.compile('(

[^<]*)(https:\/\/youtube\.com\/watch\?[\w\-.#&/=?@%+]{7,})', flags=re.I|re.A) yt_id_regex = re.compile('[a-z0-9-_]{5,20}', flags=re.I|re.A) link_fix_regex = re.compile("(\[.*?\]\()(?!http|\/)(.*?\))(?!([^<]*<\/(code|pre|a)>|[^`]*`))", flags=re.A) diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index d0176cc2f..7c98380b2 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -237,6 +237,27 @@ chud_images = listdir("files/assets/images/chud") chud_images = [f'![](/i/chud/{f})' for f in chud_images] chud_images.extend([':#trumpjaktalking:', ':#reposthorse:']) +def handle_youtube(url): + html = None + params = parse_qs(urlparse(url).query, keep_blank_values=True) + id = params.get('v')[0] + + t = None + split = id.split('?t=') + if len(split) == 2: + id = split[0] + t = split[1] + + if yt_id_regex.fullmatch(id): + if not t: + t = params.get('t', params.get('start', [0]))[0] + if isinstance(t, str): t = t.replace('s','') + html = f'' + return html + @with_sigalrm_timeout(10) def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_marseys=False, torture=False, sidebar=False, snappy=False): sanitized = sanitized.strip() @@ -356,17 +377,9 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_marseys if i.group(0) in captured: continue captured.append(i.group(0)) - params = parse_qs(urlparse(i.group(2)).query, keep_blank_values=True) - t = params.get('t', params.get('start', [0]))[0] - if isinstance(t, str): t = t.replace('s','') - - htmlsource = f'{i.group(1)}' - - sanitized = sanitized.replace(i.group(0), htmlsource) + html = handle_youtube(i.group(0)) + if html: + sanitized = sanitized.replace(i.group(0), html) sanitized = video_sub_regex.sub(r'\1

', sanitized) sanitized = audio_sub_regex.sub(r'\1', sanitized) @@ -472,12 +485,12 @@ def normalize_url(url): url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \ .replace("https://music.youtube.com/watch?v=", "https://youtube.com/watch?v=") \ .replace("https://www.youtube.com", "https://youtube.com") \ + .replace("https://m.youtube.com", "https://youtube.com") \ .replace("https://youtube.com/shorts/", "https://youtube.com/watch?v=") \ .replace("https://youtube.com/v/", "https://youtube.com/watch?v=") \ .replace("https://mobile.twitter.com", "https://twitter.com") \ .replace("https://m.facebook.com", "https://facebook.com") \ .replace("https://m.wikipedia.org", "https://wikipedia.org") \ - .replace("https://m.youtube.com", "https://youtube.com") \ .replace("https://www.twitter.com", "https://twitter.com") \ .replace("https://www.instagram.com", "https://instagram.com") \ .replace("https://www.tiktok.com", "https://tiktok.com") \ diff --git a/files/routes/posts.py b/files/routes/posts.py index ff2769411..fe4eb9693 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -630,21 +630,8 @@ def submit_post(v:User, sub=None): embed = requests.get("https://publish.twitter.com/oembed", params={"url":url, "omit_script":"t"}, timeout=5).json()["html"] embed = embed.replace('' - + elif url.startswith('https://youtube.com/watch?'): + embed = handle_youtube(url) elif SITE in domain and "/post/" in url and "context" not in url and url.count('/') < 6: id = url.split("/post/")[1] if "/" in id: id = id.split("/")[0]