we don't need bloody bs4 to get a bloody title

remotes/1693176582716663532/tmp_refs/heads/watchparty
justcool393 2022-11-11 03:24:54 -06:00
parent d35dd7617c
commit f5912bb4cc
2 changed files with 7 additions and 4 deletions

View File

@ -116,6 +116,8 @@ pronouns_regex = re.compile("([a-z]{1,5})/[a-z]{1,5}(/[a-z]{1,5})?", flags=re.A|
knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A)
html_title_regex = re.compile("<title>(.{1,200})</title>", flags=re.A|re.I)
def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS):
group_num = 0
match_str = match.group(group_num)

View File

@ -1066,7 +1066,6 @@ extensions = IMAGE_FORMATS + VIDEO_FORMATS + AUDIO_FORMATS
@limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}')
@auth_required
def get_post_title(v):
url = request.values.get("url")
if not url or '\\' in url: abort(400)
@ -1080,9 +1079,11 @@ def get_post_title(v):
content_type = x.headers.get("Content-Type")
if not content_type or "text/html" not in content_type: abort(400)
soup = BeautifulSoup(x.content, 'lxml')
title = soup.find('title')
# no you can't just parse html with reeeeeeeegex
match = html_title_regex.match(x.content)
if match and match.lastindex >= 1:
title = match.group(1)
if not title: abort(400)
return {"url": url, "title": title.string}