forked from rDrama/rDrama
we don't need bloody bs4 to get a bloody title
parent
d35dd7617c
commit
f5912bb4cc
|
@ -116,6 +116,8 @@ pronouns_regex = re.compile("([a-z]{1,5})/[a-z]{1,5}(/[a-z]{1,5})?", flags=re.A|
|
||||||
|
|
||||||
knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A)
|
knowledgebase_page_regex = re.compile("[a-zA-Z0-9_\-]+", flags=re.A)
|
||||||
|
|
||||||
|
html_title_regex = re.compile("<title>(.{1,200})</title>", flags=re.A|re.I)
|
||||||
|
|
||||||
def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS):
|
def sub_matcher(match:re.Match, upper=False, replace_with:Union[dict[str, str], dict[str, List[str]]]=SLURS):
|
||||||
group_num = 0
|
group_num = 0
|
||||||
match_str = match.group(group_num)
|
match_str = match.group(group_num)
|
||||||
|
|
|
@ -1066,7 +1066,6 @@ extensions = IMAGE_FORMATS + VIDEO_FORMATS + AUDIO_FORMATS
|
||||||
@limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}')
|
@limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}')
|
||||||
@auth_required
|
@auth_required
|
||||||
def get_post_title(v):
|
def get_post_title(v):
|
||||||
|
|
||||||
url = request.values.get("url")
|
url = request.values.get("url")
|
||||||
if not url or '\\' in url: abort(400)
|
if not url or '\\' in url: abort(400)
|
||||||
|
|
||||||
|
@ -1080,9 +1079,11 @@ def get_post_title(v):
|
||||||
content_type = x.headers.get("Content-Type")
|
content_type = x.headers.get("Content-Type")
|
||||||
if not content_type or "text/html" not in content_type: abort(400)
|
if not content_type or "text/html" not in content_type: abort(400)
|
||||||
|
|
||||||
soup = BeautifulSoup(x.content, 'lxml')
|
# no you can't just parse html with reeeeeeeegex
|
||||||
|
match = html_title_regex.match(x.content)
|
||||||
title = soup.find('title')
|
if match and match.lastindex >= 1:
|
||||||
|
title = match.group(1)
|
||||||
|
|
||||||
if not title: abort(400)
|
if not title: abort(400)
|
||||||
|
|
||||||
return {"url": url, "title": title.string}
|
return {"url": url, "title": title.string}
|
||||||
|
|
Loading…
Reference in New Issue