diff --git a/files/helpers/regex.py b/files/helpers/regex.py index 12bdc5b9b..458d7cabe 100644 --- a/files/helpers/regex.py +++ b/files/helpers/regex.py @@ -116,6 +116,8 @@ pronouns_regex = re.compile("([a-z]{1,5})/[a-z]{1,5}(/[a-z]{1,5})?", flags=re.A| knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A) +html_title_regex = re.compile("(.{1,200})", flags=re.A|re.I) + def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS): group_num = 0 match_str = match.group(group_num) diff --git a/files/routes/posts.py b/files/routes/posts.py index f0c52e90e..0f3aa3078 100644 --- a/files/routes/posts.py +++ b/files/routes/posts.py @@ -1066,7 +1066,6 @@ extensions = IMAGE_FORMATS + VIDEO_FORMATS + AUDIO_FORMATS @limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}') @auth_required def get_post_title(v): - url = request.values.get("url") if not url or '\\' in url: abort(400) @@ -1080,9 +1079,11 @@ def get_post_title(v): content_type = x.headers.get("Content-Type") if not content_type or "text/html" not in content_type: abort(400) - soup = BeautifulSoup(x.content, 'lxml') - - title = soup.find('title') + # no you can't just parse html with reeeeeeeegex + match = html_title_regex.match(x.content) + if match and match.lastindex >= 1: + title = match.group(1) + if not title: abort(400) return {"url": url, "title": title.string}