diff --git a/files/helpers/const.py b/files/helpers/const.py index c437270c04..427bd702ac 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -670,7 +670,7 @@ marsey_regex = re.compile("[a-z0-9]{1,30}", flags=re.A) tags_regex = re.compile("[a-z0-9: ]{1,200}", flags=re.A) -image_regex = re.compile("(^https:\/\/[\w\-.#&/=\?@%+]+\.(png|jpg|jpeg|gif|webp|maxwidth=9999|fidelity=high)($|\s))", flags=re.I|re.M|re.A) +image_regex = re.compile("(^https:\/\/[\w\-.#&/=\?@%+]{5,250}\.(png|jpg|jpeg|gif|webp|maxwidth=9999|fidelity=high)($|\s))", flags=re.I|re.M|re.A) valid_sub_regex = re.compile("^[a-zA-Z0-9_\-]{3,20}$", flags=re.A) @@ -688,13 +688,13 @@ title_regex = re.compile("[^\w ]", flags=re.A) based_regex = re.compile("based and (.{1,20}?)(-| )pilled", flags=re.I|re.A) -controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]+)["< ]', flags=re.A) +controversial_regex = re.compile('["> ](https:\/\/old\.reddit\.com/r/[a-zA-Z0-9_]{3,20}\/comments\/[\w\-.#&/=\?@%+]{5,250})["< ]', flags=re.A) fishylinks_regex = re.compile("https?://\S+", flags=re.A) spoiler_regex = re.compile('''\|\|([^/'"]+)\|\|''', flags=re.A) -video_regex = re.compile('

(https:\/\/[\w\-.#&/=\?@%+]+\.(mp4|webm|mov))<\/a><\/p>', flags=re.I|re.A) -unlinked_regex = re.compile('''(^|\s|

)(https:\/\/[\w\-.#&/=\?@%+]+)''', flags=re.A) +video_regex = re.compile('

(https:\/\/[\w\-.#&/=\?@%+]{5,250}\.(mp4|webm|mov))<\/a><\/p>', flags=re.I|re.A) +unlinked_regex = re.compile('''(^|\s|

)(https:\/\/[\w\-.#&/=\?@%+]{5,250})''', flags=re.A) imgur_regex = re.compile('(https://i\.imgur\.com/([a-z0-9]+))\.(jpg|png|jpeg|webp)(?!)', flags=re.I|re.A) reddit_regex = re.compile('(^|\s|

)\/?((r|u)\/(\w|-){3,25})', flags=re.A) sub_regex = re.compile('(^|\s|

)\/?(h\/(\w|-){3,25})', flags=re.A) @@ -708,15 +708,17 @@ emoji_regex2 = re.compile('(?([\w:~,()\-.#&\/=?@%+]+)<\/a>', flags=re.A) +snappy_url_regex = re.compile('([\w:~,()\-.#&\/=?@%+]{5,250})<\/a>', flags=re.A) -email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+', flags=re.A) +email_regex = re.compile('([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,100})+', flags=re.A) -reddit_post_regex = re.compile('(https:\/\/old\.reddit\.com\/r\/\w{1,30}\/comments\/[a-z0-9]+)[\w\-.#&/=?@%+]+', flags=re.A) +reddit_post_regex = re.compile('(https:\/\/old\.reddit\.com\/r\/\w{1,30}\/comments\/[a-z0-9]+)[\w\-.#&/=?@%+]{5,250}', flags=re.A) utm_regex = re.compile('utm_[a-z]+=[a-z0-9_]+&', flags=re.A) utm_regex2 = re.compile('[?&]utm_[a-z]+=[a-z0-9_]+', flags=re.A) +yt_id_regex = re.compile('[A-Za-z0-9]{5,15}', flags=re.A) + slur_regex = re.compile(rf"((?<=\s|>)|^)({single_words})((?=[\s<,.$]|s[\s<,.$]))", flags=re.I|re.A) slur_regex_upper = re.compile(rf"((?<=\s|>)|^)({single_words.upper()})((?=[\s<,.$]|S[\s<,.$]))", flags=re.A) torture_regex = re.compile('(^|\s)(i|me) ', flags=re.I|re.A) @@ -744,3 +746,5 @@ def torture_ap(body, username): YOUTUBE_KEY = environ.get("YOUTUBE_KEY", "").strip() ADMIGGERS = (37696,37697,37749,37833,37838) + +proxies = {"http":"http://127.0.0.1:18080","https":"http://127.0.0.1:18080"} \ No newline at end of file diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py index ad0de40286..001e366f3c 100644 --- a/files/helpers/sanitize.py +++ b/files/helpers/sanitize.py @@ -303,6 +303,8 @@ def sanitize(sanitized, noimages=False, alert=False, comment=False, edit=False): url = i.group(1) yt_id = i.group(2).split('&')[0].split('%')[0] + if not yt_id_regex.fullmatch(yt_id): continue + replacing = f'{url}' params = parse_qs(urlparse(url.replace('&','&')).query) diff --git a/files/routes/posts.py b/files/routes/posts.py index 179c458f78..50a1e2d419 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -612,7 +612,7 @@ def thumbnail_thread(pid): headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36"} try: - x=requests.get(fetch_url, headers=headers, timeout=5) + x=requests.get(fetch_url, headers=headers, timeout=5, proxies=proxies) except: db.close() return @@ -663,7 +663,7 @@ def thumbnail_thread(pid): for url in thumb_candidate_urls: try: - image_req=requests.get(url, headers=headers, timeout=5) + image_req=requests.get(url, headers=headers, timeout=5, proxies=proxies) except: continue @@ -918,18 +918,18 @@ def submit_post(v, sub=None): url = unquote(url).replace('?t', '&t') yt_id = url.split('https://youtube.com/watch?v=')[1].split('&')[0].split('%')[0] - req = requests.get(f"https://www.googleapis.com/youtube/v3/videos?id={yt_id}&key={YOUTUBE_KEY}&part=contentDetails", timeout=5).json() + if yt_id_regex.fullmatch(yt_id): + req = requests.get(f"https://www.googleapis.com/youtube/v3/videos?id={yt_id}&key={YOUTUBE_KEY}&part=contentDetails", timeout=5).json() + if req.get('items'): + params = parse_qs(urlparse(url).query) + t = params.get('t', params.get('start', [0]))[0] + if isinstance(t, str): t = t.replace('s','') - if req.get('items'): - params = parse_qs(urlparse(url).query) - t = params.get('t', params.get('start', [0]))[0] - if isinstance(t, str): t = t.replace('s','') - - embed = f'' + embed = f'' elif app.config['SERVER_NAME'] in domain and "/post/" in url and "context" not in url: id = url.split("/post/")[1] @@ -1469,7 +1469,7 @@ def get_post_title(v): url = request.values.get("url") if not url: abort(400) - try: x = requests.get(url, headers=titleheaders, timeout=5, proxies={"http":"http://127.0.0.1:18080","https":"http://127.0.0.1:18080"} ) + try: x = requests.get(url, headers=titleheaders, timeout=5, proxies=proxies) except: abort(400) soup = BeautifulSoup(x.content, 'lxml')