only attempt to parse HTML content types for titles (#382)

* only attempt to parse HTML content types for titles

also don't try to get submission titles for .gifv, .tif, .tiff

* ratelimit to 3 per minute instead of 6 minutes

no one will ever need more than 3 requests to this endpoint per minute - justcool393

6 per minute is already kinda a lot for this endpoint, i think aggressively ratelimiting this one is fine, especially since it's a minute ratelimit
master
justcool393 2022-09-30 05:13:06 -07:00 committed by GitHub
parent 0617bb154c
commit c1ca1a02ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 3 deletions

View File

@ -1197,14 +1197,14 @@ def pin_post(post_id, v):
extensions = ( extensions = (
'.webp','.jpg','.png','.jpeg','.gif', '.webp','.jpg','.png','.jpeg','.gif','.gifv','.tif', '.tiff',
'.mp4','.webm','.mov', '.mp4','.webm','.mov',
'.mp3','.wav','.ogg','.aac','.m4a','.flac' '.mp3','.wav','.ogg','.aac','.m4a','.flac'
) )
@app.get("/submit/title") @app.get("/submit/title")
@limiter.limit("6/minute") @limiter.limit("3/minute")
@limiter.limit("6/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}') @limiter.limit("3/minute", key_func=lambda:f'{SITE}-{session.get("lo_user")}')
@auth_required @auth_required
def get_post_title(v): def get_post_title(v):
@ -1218,6 +1218,9 @@ def get_post_title(v):
try: x = requests.get(url, headers=titleheaders, timeout=5, proxies=proxies) try: x = requests.get(url, headers=titleheaders, timeout=5, proxies=proxies)
except: abort(400) except: abort(400)
content_type = x.headers.get("Content-Type")
if not content_type or "text/html" not in content_type: abort(400)
soup = BeautifulSoup(x.content, 'lxml') soup = BeautifulSoup(x.content, 'lxml')
title = soup.find('title') title = soup.find('title')