prevent retards from doxing themselves through tiktok links - post edition
parent
d1108f1fad
commit
1fd6295e8c
|
@ -172,7 +172,7 @@ def process_video(file, v):
|
|||
else:
|
||||
return f"{SITE_FULL}{new}"
|
||||
|
||||
def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
|
||||
def process_image(filename, v, resize=0, trim=False, uploader_id=None):
|
||||
# thumbnails are processed in a thread and not in the request context
|
||||
# if an image is too large or webp conversion fails, it'll crash
|
||||
# to avoid this, we'll simply return None instead
|
||||
|
@ -248,10 +248,8 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
|
|||
os.remove(filename)
|
||||
return None
|
||||
|
||||
db = db or g.db
|
||||
|
||||
media = db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
|
||||
if media: db.delete(media)
|
||||
media = g.db.query(Media).filter_by(filename=filename, kind='image').one_or_none()
|
||||
if media: g.db.delete(media)
|
||||
|
||||
media = Media(
|
||||
kind='image',
|
||||
|
@ -259,7 +257,7 @@ def process_image(filename, v, resize=0, trim=False, uploader_id=None, db=None):
|
|||
user_id=uploader_id or v.id,
|
||||
size=os.stat(filename).st_size
|
||||
)
|
||||
db.add(media)
|
||||
g.db.add(media)
|
||||
|
||||
if SITE == 'watchpeopledie.tv' and v and "dylan" in v.username.lower() and "hewitt" in v.username.lower():
|
||||
gevent.spawn(delete_file, filename)
|
||||
|
|
|
@ -187,6 +187,7 @@ reddit_comment_link_regex = re.compile("https:\/\/old.reddit.com\/r\/\w{2,25}\/c
|
|||
|
||||
#gevent
|
||||
reddit_s_url_regex = re.compile("https:\/\/reddit.com\/[ru]\/\w{2,25}\/s\/\w{10}", flags=re.A)
|
||||
tiktok_t_url_regex = re.compile("https:\/\/tiktok.com\/t\/\w{9}", flags=re.A)
|
||||
|
||||
#run-time
|
||||
reddit_to_vreddit_regex = re.compile('(^|>|")https:\/\/old.reddit.com\/([ru])\/', flags=re.A)
|
||||
|
|
|
@ -423,7 +423,9 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
|
||||
if not sanitized: return ''
|
||||
|
||||
if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
|
||||
v = getattr(g, 'v', None)
|
||||
|
||||
if blackjack and execute_blackjack(v, None, sanitized, blackjack):
|
||||
sanitized = 'g'
|
||||
|
||||
if '```' not in sanitized and '<pre>' not in sanitized:
|
||||
|
@ -450,8 +452,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
sanitized = reddit_mention_regex.sub(r'<a href="https://old.reddit.com/\1" rel="nofollow noopener" target="_blank">/\1</a>', sanitized)
|
||||
sanitized = hole_mention_regex.sub(r'<a href="/\1">/\1</a>', sanitized)
|
||||
|
||||
v = getattr(g, 'v', None)
|
||||
|
||||
names = set(m.group(1) for m in mention_regex.finditer(sanitized))
|
||||
|
||||
if limit_pings and len(names) > limit_pings and v.admin_level < PERMS['POST_COMMENT_INFINITE_PINGS']:
|
||||
|
@ -485,7 +485,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
elif name == 'commenters' and commenters_ping_post_id:
|
||||
return f'<a href="/!commenters/{commenters_ping_post_id}/{int(time.time())}">!{name}</a>'
|
||||
elif name == 'followers':
|
||||
return f'<a href="/id/{g.v.id}/followers">!{name}</a>'
|
||||
return f'<a href="/id/{v.id}/followers">!{name}</a>'
|
||||
elif g.db.get(Group, name):
|
||||
return f'<a href="/!{name}">!{name}</a>'
|
||||
else:
|
||||
|
@ -572,7 +572,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
sanitized = sanitized.replace('<p></p>', '')
|
||||
|
||||
allowed_css_properties = allowed_styles.copy()
|
||||
if g.v and g.v.chud:
|
||||
if v and v.chud:
|
||||
allowed_css_properties.remove('filter')
|
||||
|
||||
css_sanitizer = CSSSanitizer(allowed_css_properties=allowed_css_properties)
|
||||
|
@ -591,7 +591,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
|||
|
||||
links = soup.find_all("a")
|
||||
|
||||
if g.v and g.v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
|
||||
if v and v.admin_level >= PERMS["IGNORE_DOMAIN_BAN"]:
|
||||
banned_domains = []
|
||||
else:
|
||||
banned_domains = [x.domain for x in g.db.query(BannedDomain.domain)]
|
||||
|
|
|
@ -287,118 +287,119 @@ def expand_url(post_url, fragment_url):
|
|||
return f"{post_url}/{fragment_url}"
|
||||
|
||||
|
||||
def reddit_s_url_cleaner(url):
|
||||
return normalize_url(requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url)
|
||||
def cancer_url_cleaner(url):
|
||||
try: url = requests.get(url, headers=HEADERS, timeout=2, proxies=proxies).url
|
||||
except: return url
|
||||
return normalize_url(url)
|
||||
|
||||
def surl_and_thumbnail_thread(post_url, post_body, post_body_html, pid, generate_thumb):
|
||||
#s_url
|
||||
dirty = False
|
||||
def postprocess_post(post_url, post_body, post_body_html, pid, generate_thumb, edit):
|
||||
with app.app_context():
|
||||
if post_url and (reddit_s_url_regex.fullmatch(post_url) or tiktok_t_url_regex.fullmatch(post_url)):
|
||||
post_url = cancer_url_cleaner(post_url)
|
||||
|
||||
if post_url and reddit_s_url_regex.fullmatch(post_url):
|
||||
post_url = reddit_s_url_cleaner(post_url)
|
||||
dirty = True
|
||||
if post_body:
|
||||
li = list(reddit_s_url_regex.finditer(post_body)) + list(tiktok_t_url_regex.finditer(post_body))
|
||||
for i in li:
|
||||
old = i.group(0)
|
||||
new = cancer_url_cleaner(old)
|
||||
post_body = post_body.replace(old, new)
|
||||
post_body_html = post_body_html.replace(old, new)
|
||||
|
||||
if post_body:
|
||||
for i in reddit_s_url_regex.finditer(post_body):
|
||||
old = i.group(0)
|
||||
new = reddit_s_url_cleaner(old)
|
||||
post_body = post_body.replace(old, new)
|
||||
post_body_html = post_body_html.replace(old, new)
|
||||
dirty = True
|
||||
g.db = db_session()
|
||||
|
||||
if dirty:
|
||||
db = db_session()
|
||||
|
||||
p = db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
|
||||
p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.id)).one_or_none()
|
||||
p.url = post_url
|
||||
p.body = post_body
|
||||
p.body_html = post_body_html
|
||||
g.db.add(p)
|
||||
|
||||
db.add(p)
|
||||
db.commit()
|
||||
db.close()
|
||||
if not p.private and not edit:
|
||||
execute_snappy(p, p.author)
|
||||
|
||||
stdout.flush()
|
||||
g.db.commit()
|
||||
g.db.close()
|
||||
|
||||
stdout.flush()
|
||||
|
||||
|
||||
#thumbnail
|
||||
if not generate_thumb: return
|
||||
#thumbnail
|
||||
if not generate_thumb: return
|
||||
|
||||
if post_url.startswith('/') and '\\' not in post_url:
|
||||
post_url = f"{SITE_FULL}{post_url}"
|
||||
if post_url.startswith('/') and '\\' not in post_url:
|
||||
post_url = f"{SITE_FULL}{post_url}"
|
||||
|
||||
try:
|
||||
x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
|
||||
except:
|
||||
return
|
||||
try:
|
||||
x = requests.get(post_url, headers=HEADERS, timeout=5, proxies=proxies)
|
||||
except:
|
||||
return
|
||||
|
||||
if x.status_code != 200:
|
||||
return
|
||||
if x.status_code != 200:
|
||||
return
|
||||
|
||||
if x.headers.get("Content-Type","").startswith("text/html"):
|
||||
soup = BeautifulSoup(x.content, 'lxml')
|
||||
if x.headers.get("Content-Type","").startswith("text/html"):
|
||||
soup = BeautifulSoup(x.content, 'lxml')
|
||||
|
||||
thumb_candidate_urls = []
|
||||
thumb_candidate_urls = []
|
||||
|
||||
for tag_name in ("twitter:image", "og:image", "thumbnail"):
|
||||
tag = soup.find('meta', attrs={"name": tag_name, "content": True})
|
||||
if not tag:
|
||||
tag = soup.find('meta', attrs={"property": tag_name, "content": True})
|
||||
if tag:
|
||||
thumb_candidate_urls.append(expand_url(post_url, tag['content']))
|
||||
for tag_name in ("twitter:image", "og:image", "thumbnail"):
|
||||
tag = soup.find('meta', attrs={"name": tag_name, "content": True})
|
||||
if not tag:
|
||||
tag = soup.find('meta', attrs={"property": tag_name, "content": True})
|
||||
if tag:
|
||||
thumb_candidate_urls.append(expand_url(post_url, tag['content']))
|
||||
|
||||
for tag in soup.find_all("img", attrs={'src': True}):
|
||||
thumb_candidate_urls.append(expand_url(post_url, tag['src']))
|
||||
for tag in soup.find_all("img", attrs={'src': True}):
|
||||
thumb_candidate_urls.append(expand_url(post_url, tag['src']))
|
||||
|
||||
for url in thumb_candidate_urls:
|
||||
try:
|
||||
image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
|
||||
except:
|
||||
continue
|
||||
|
||||
if image_req.status_code >= 400:
|
||||
continue
|
||||
|
||||
if not image_req.headers.get("Content-Type","").startswith("image/"):
|
||||
continue
|
||||
|
||||
if image_req.headers.get("Content-Type","").startswith("image/svg"):
|
||||
continue
|
||||
|
||||
with Image.open(BytesIO(image_req.content)) as i:
|
||||
if i.width < 30 or i.height < 30:
|
||||
for url in thumb_candidate_urls:
|
||||
try:
|
||||
image_req = requests.get(url, headers=HEADERS, timeout=5, proxies=proxies)
|
||||
except:
|
||||
continue
|
||||
break
|
||||
|
||||
if image_req.status_code >= 400:
|
||||
continue
|
||||
|
||||
if not image_req.headers.get("Content-Type","").startswith("image/"):
|
||||
continue
|
||||
|
||||
if image_req.headers.get("Content-Type","").startswith("image/svg"):
|
||||
continue
|
||||
|
||||
with Image.open(BytesIO(image_req.content)) as i:
|
||||
if i.width < 30 or i.height < 30:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
return
|
||||
elif x.headers.get("Content-Type","").startswith("image/"):
|
||||
image_req = x
|
||||
with Image.open(BytesIO(x.content)) as i:
|
||||
size = len(i.fp.read())
|
||||
if size > 8 * 1024 * 1024:
|
||||
return
|
||||
else:
|
||||
return
|
||||
elif x.headers.get("Content-Type","").startswith("image/"):
|
||||
image_req = x
|
||||
with Image.open(BytesIO(x.content)) as i:
|
||||
size = len(i.fp.read())
|
||||
if size > 8 * 1024 * 1024:
|
||||
return
|
||||
else:
|
||||
return
|
||||
|
||||
name = f'/images/{time.time()}'.replace('.','') + '.webp'
|
||||
name = f'/images/{time.time()}'.replace('.','') + '.webp'
|
||||
|
||||
with open(name, "wb") as file:
|
||||
for chunk in image_req.iter_content(1024):
|
||||
file.write(chunk)
|
||||
with open(name, "wb") as file:
|
||||
for chunk in image_req.iter_content(1024):
|
||||
file.write(chunk)
|
||||
|
||||
db = db_session()
|
||||
g.db = db_session()
|
||||
|
||||
p = db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
|
||||
p = g.db.query(Post).filter_by(id=pid).options(load_only(Post.author_id)).one_or_none()
|
||||
|
||||
thumburl = process_image(name, None, resize=99, uploader_id=p.author_id, db=db)
|
||||
thumburl = process_image(name, None, resize=99, uploader_id=p.author_id)
|
||||
|
||||
if thumburl:
|
||||
p.thumburl = thumburl
|
||||
db.add(p)
|
||||
if thumburl:
|
||||
p.thumburl = thumburl
|
||||
g.db.add(p)
|
||||
|
||||
db.commit()
|
||||
db.close()
|
||||
stdout.flush()
|
||||
g.db.commit()
|
||||
g.db.close()
|
||||
stdout.flush()
|
||||
|
||||
|
||||
@app.post("/is_repost")
|
||||
|
@ -698,13 +699,10 @@ def submit_post(v, sub=None):
|
|||
cache.delete_memoized(frontlist)
|
||||
cache.delete_memoized(userpagelisting)
|
||||
|
||||
if not p.private:
|
||||
execute_snappy(p, v)
|
||||
|
||||
g.db.flush() #Necessary, do NOT remove
|
||||
|
||||
generate_thumb = (not p.thumburl and p.url and p.domain != SITE)
|
||||
gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, generate_thumb)
|
||||
gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, generate_thumb, False)
|
||||
|
||||
if v.client: return p.json
|
||||
else:
|
||||
|
@ -1054,7 +1052,7 @@ def edit_post(pid, v):
|
|||
|
||||
process_poll_options(v, p)
|
||||
|
||||
gevent.spawn(surl_and_thumbnail_thread, p.url, p.body, p.body_html, p.id, False)
|
||||
gevent.spawn(postprocess_post, p.url, p.body, p.body_html, p.id, False, True)
|
||||
|
||||
|
||||
if not complies_with_chud(p):
|
||||
|
|
Loading…
Reference in New Issue