forked from rDrama/rDrama
we don't need bloody bs4 to get a bloody title
parent
d35dd7617c
commit
f5912bb4cc
|
@ -116,6 +116,8 @@ pronouns_regex = re.compile("([a-z]{1,5})/[a-z]{1,5}(/[a-z]{1,5})?", flags=re.A|
|
|||
|
||||
knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A)
|
||||
|
||||
html_title_regex = re.compile("<title>(.{1,200})</title>", flags=re.A|re.I)
|
||||
|
||||
def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS):
|
||||
group_num = 0
|
||||
match_str = match.group(group_num)
|
||||
|
|
|
@ -1066,7 +1066,6 @@ extensions = IMAGE_FORMATS + VIDEO_FORMATS + AUDIO_FORMATS
|
|||
@limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}')
|
||||
@auth_required
|
||||
def get_post_title(v):
|
||||
|
||||
url = request.values.get("url")
|
||||
if not url or '\\' in url: abort(400)
|
||||
|
||||
|
@ -1080,9 +1079,11 @@ def get_post_title(v):
|
|||
content_type = x.headers.get("Content-Type")
|
||||
if not content_type or "text/html" not in content_type: abort(400)
|
||||
|
||||
soup = BeautifulSoup(x.content, 'lxml')
|
||||
# no you can't just parse html with reeeeeeeegex
|
||||
match = html_title_regex.match(x.content)
|
||||
if match and match.lastindex >= 1:
|
||||
title = match.group(1)
|
||||
|
||||
title = soup.find('title')
|
||||
if not title: abort(400)
|
||||
|
||||
return {"url": url, "title": title.string}
|
||||
|
|
Loading…
Reference in New Issue