purge unviewable videos after 30 days

master
Aevann 2024-10-22 20:57:23 +03:00
parent 262adb84f7
commit 7bfd00ca0b
13 changed files with 202 additions and 10 deletions

View File

@ -237,6 +237,7 @@ class Comment(Base):
casino_game = relationship("CasinoGame")
wall_user = relationship("User", primaryjoin="User.id==Comment.wall_user_id")
edits = relationship("CommentEdit", order_by="CommentEdit.id.desc()")
media_usages = relationship("MediaUsage")
def __init__(self, *args, **kwargs):
if "created_utc" not in kwargs:

View File

@ -1,5 +1,6 @@
import time
from sqlalchemy import Column, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.sql.sqltypes import *
from flask import request, has_request_context
from files.classes import Base
@ -23,3 +24,23 @@ class Media(Base):
def __repr__(self):
return f"<{self.__class__.__name__}(kind={self.kind}, filename={self.filename})>"
class MediaUsage(Base):
__tablename__ = "media_usages"
id = Column(Integer, primary_key=True)
filename = Column(String, ForeignKey("media.filename"))
post_id = Column(Integer, ForeignKey("posts.id"))
comment_id = Column(Integer, ForeignKey("comments.id"))
created_utc = Column(Integer)
deleted_utc = Column(Integer)
removed_utc = Column(Integer)
media = relationship("Media")
def __init__(self, *args, **kwargs):
if "created_utc" not in kwargs:
kwargs["created_utc"] = int(time.time())
super().__init__(*args, **kwargs)
def __repr__(self):
return f"<{self.__class__.__name__}(id={self.id})>"

View File

@ -87,6 +87,7 @@ class Post(Base):
hole_obj = relationship("Hole", primaryjoin="foreign(Post.hole)==remote(Hole.name)")
options = relationship("PostOption", order_by="PostOption.id")
edits = relationship("PostEdit", order_by="PostEdit.id.desc()")
media_usages = relationship("MediaUsage")
def __init__(self, *args, **kwargs):
if "created_utc" not in kwargs:

View File

@ -430,6 +430,11 @@ def execute_antispam_post_check(title, v, url):
for post in similar_posts + similar_urls:
post.is_banned = True
for media_usage in post.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
post.profile_pinned = False
post.ban_reason = "AutoJanny for spamming"
g.db.add(post)
@ -491,6 +496,11 @@ def execute_antispam_comment_check(body, v):
for comment in similar_comments:
comment.is_banned = True
for media_usage in comment.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
comment.ban_reason = "AutoJanny for spamming"
g.db.add(comment)
ma = ModAction(

View File

@ -12,6 +12,7 @@ from sqlalchemy import or_
import click
import requests
import humanize
from files.helpers.stats import *
from files.routes.front import frontlist
@ -422,3 +423,25 @@ def _set_top_poster_of_the_day_id():
def _cleanup_videos():
subprocess.call("scripts/cleanup_videos.sh", timeout=3000)
db = db_session()
clean = [x[0] for x in db.query(MediaUsage.filename).filter_by(deleted_utc=None, removed_utc=None)]
one_month_ago = time.time() - 2592000
to_delete = db.query(MediaUsage.filename, Media.size).join(MediaUsage.media).filter(
MediaUsage.filename.notin_(clean),
or_(
MediaUsage.deleted_utc < one_month_ago,
MediaUsage.removed_utc < one_month_ago,
),
).order_by(Media.size.desc())
total_saved = 0
for filename, size in to_delete:
total_saved += size
print(filename, humanize.naturalsize(size, binary=True), flush=True)
total_saved = humanize.naturalsize(total_saved, binary=True)
print(f"Total saved: {total_saved}")

View File

@ -163,7 +163,7 @@ def reencode_video(old, new, check_sizes=False):
def process_video(file, v):
def process_video(file, v, post=None):
if isinstance(file, str):
old = file
else:
@ -224,6 +224,12 @@ def process_video(file, v):
)
g.db.add(media)
if post:
media_usage = MediaUsage(filename=new)
media_usage.post_id = post.id
g.db.add(media_usage)
url = SITE_FULL_VIDEOS + new.split('/videos')[1]
name = f'/images/{time.time()}'.replace('.','') + '.webp'

View File

@ -2,7 +2,7 @@ import random
import re
from flask import g
from files.classes.media import Media
from files.classes.media import *
from .config.const import *
NOT_IN_CODE_OR_LINKS = '(?!([^<]*<\/(code|pre|a)>|[^`\n]*`))'
@ -109,14 +109,30 @@ image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/({hosts})\/|\/)).*?)\)',
video_regex_extensions = '|'.join(VIDEO_FORMATS)
video_sub_regex = re.compile(f'(?<!")(https:\/\/({hosts})\/[\w:~,()\-.#&\/=?@%;+]*?\.({video_regex_extensions}))' + NOT_IN_CODE_OR_LINKS_OR_SPOILER, flags=re.A|re.I)
def video_sub_regex_matcher(match):
def video_sub_regex_matcher(match, obj):
url = match.group(1)
if url.startswith(SITE_FULL_VIDEOS):
filename = '/videos/' + url.split(f'{SITE_FULL_VIDEOS}/')[1]
g.db.flush()
posterurl = g.db.query(Media.posterurl).filter_by(filename=filename).one_or_none()
if posterurl:
return f'<p class="resizable"><video poster="{posterurl[0]}" controls preload="none" src="{url}"></video></p>'
media = g.db.get(Media, filename)
if media:
if obj:
if not obj.id: raise Exception("The thing that never happens happened again")
if str(obj.__class__) == "<class 'files.classes.post.Post'>":
existing = g.db.query(MediaUsage.id).filter_by(filename=filename, post_id=obj.id).one_or_none()
if not existing:
media_usage = MediaUsage(filename=filename)
media_usage.post_id = obj.id
g.db.add(media_usage)
else:
existing = g.db.query(MediaUsage.id).filter_by(filename=filename, comment_id=obj.id).one_or_none()
if not existing:
media_usage = MediaUsage(filename=filename)
media_usage.comment_id = obj.id
g.db.add(media_usage)
if media.posterurl:
return f'<p class="resizable"><video poster="{media.posterurl[0]}" controls preload="none" src="{url}"></video></p>'
return f'<p class="resizable"><video controls preload="none" src="{url}"></video></p>'
audio_regex_extensions = '|'.join(AUDIO_FORMATS)

View File

@ -487,7 +487,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = sanitized.replace('&amp;','&')
sanitized = video_sub_regex.sub(video_sub_regex_matcher, sanitized)
sanitized = video_sub_regex.sub(lambda match: video_sub_regex_matcher(match, obj), sanitized)
sanitized = audio_sub_regex.sub(r'<audio controls preload="none" src="\1"></audio>', sanitized)
if count_emojis:

View File

@ -245,6 +245,11 @@ def revert_actions(v, username):
for item in posts + comments:
item.is_banned = False
for media_usage in item.media_usages:
media_usage.removed_utc = None
g.db.add(media_usage)
item.ban_reason = None
item.is_approved = v.id
g.db.add(item)
@ -1377,6 +1382,11 @@ def unprogstack_comment(comment_id, v):
def remove_post(post_id, v):
post = get_post(post_id)
post.is_banned = True
for media_usage in post.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
post.is_approved = None
if not FEATURES['AWARDS'] or not post.pinned or not post.pinned.endswith(PIN_AWARD_TEXT):
@ -1423,6 +1433,11 @@ def approve_post(post_id, v):
g.db.add(ma)
post.is_banned = False
for media_usage in post.media_usages:
media_usage.removed_utc = None
g.db.add(media_usage)
post.ban_reason = None
post.is_approved = v.id
@ -1595,6 +1610,11 @@ def remove_comment(c_id, v):
comment = get_comment(c_id)
comment.is_banned = True
for media_usage in comment.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
comment.is_approved = None
comment.ban_reason = v.username
g.db.add(comment)
@ -1633,6 +1653,11 @@ def approve_comment(c_id, v):
g.db.add(ma)
comment.is_banned = False
for media_usage in comment.media_usages:
media_usage.removed_utc = None
g.db.add(media_usage)
comment.ban_reason = None
comment.is_approved = v.id
@ -1721,6 +1746,11 @@ def admin_nuke_user(v):
continue
post.is_banned = True
for media_usage in post.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
post.ban_reason = v.username
g.db.add(post)
@ -1729,6 +1759,11 @@ def admin_nuke_user(v):
continue
comment.is_banned = True
for media_usage in comment.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
comment.ban_reason = v.username
g.db.add(comment)
@ -1757,6 +1792,11 @@ def admin_nunuke_user(v):
continue
post.is_banned = False
for media_usage in post.media_usages:
media_usage.removed_utc = None
g.db.add(media_usage)
post.ban_reason = None
post.is_approved = v.id
g.db.add(post)
@ -1766,6 +1806,11 @@ def admin_nunuke_user(v):
continue
comment.is_banned = False
for media_usage in comment.media_usages:
media_usage.removed_utc = None
g.db.add(media_usage)
comment.ban_reason = None
comment.is_approved = v.id
g.db.add(comment)

View File

@ -232,6 +232,7 @@ def comment(v):
c.upvotes = 1
g.db.add(c)
body_html = sanitize(body, limit_pings=5, showmore=(not v.hieroglyphs), count_emojis=not v.marsify, commenters_ping_post_id=commenters_ping_post_id, obj=c, author=v)
if post_target.id not in ADMIGGER_THREADS and not (v.chud and v.chud_phrase.lower() in body.lower()):
@ -256,7 +257,6 @@ def comment(v):
c.body_html = body_html
g.db.add(c)
g.db.flush()
if not posting_to_post and v.admin_level >= PERMS['ADMIN_NOTES'] and request.values.get('admin_note') == 'true' :
@ -277,6 +277,11 @@ def comment(v):
if not complies_with_chud(c):
c.is_banned = True
for media_usage in c.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
c.ban_reason = "AutoJanny for lack of chud phrase"
g.db.add(c)
@ -425,6 +430,10 @@ def delete_comment(cid, v):
v.comment_count -= 1
g.db.add(v)
for media_usage in c.media_usages:
media_usage.deleted_utc = c.deleted_utc
g.db.add(media_usage)
cache.delete_memoized(comment_idlist)
if c.parent_post:
@ -459,6 +468,10 @@ def undelete_comment(cid, v):
v.comment_count += 1
g.db.add(v)
for media_usage in c.media_usages:
media_usage.deleted_utc = None
g.db.add(media_usage)
cache.delete_memoized(comment_idlist)
if c.parent_post:

View File

@ -641,6 +641,7 @@ def submit_post(v, hole=None):
p.title_html = title_html
g.db.add(p)
body_html = sanitize(body, count_emojis=True, limit_pings=100, obj=p, author=v)
if v.hieroglyphs and not p.distinguished and marseyaward_body_regex.search(body_html):
@ -684,7 +685,7 @@ def submit_post(v, hole=None):
copyfile(name, name2)
p.thumburl = process_image(name2, v, resize=199)
elif file.content_type.startswith('video/'):
p.url, p.posterurl, name = process_video(file, v)
p.url, p.posterurl, name = process_video(file, v, post=p)
if p.posterurl:
name2 = name.replace('.webp', 'r.webp')
copyfile(name, name2)
@ -693,9 +694,27 @@ def submit_post(v, hole=None):
p.url = process_audio(file, v)
else:
stop(415)
elif p.url and p.url.startswith(SITE_FULL_VIDEOS):
filename = p.url.split(SITE_FULL_VIDEOS)[0]
print(filename, flush=True)
media = g.db.get(Media, filename)
print(media, flush=True)
if media:
media_usage = MediaUsage(
filename=filename,
post_id=p.id,
)
g.db.add(media_usage)
if media.posterurl:
p.posterurl = media.posterurl
if not p.draft and not complies_with_chud(p):
p.is_banned = True
for media_usage in p.media_usages:
media_usage.removed_utc = time.time()
g.db.add(media_usage)
p.ban_reason = "AutoJanny for lack of chud phrase"
body = random.choice(CHUD_MSGS).format(username=v.username, type='post', CHUD_PHRASE=v.chud_phrase)
@ -802,6 +821,10 @@ def delete_post_pid(pid, v):
v.post_count -= 1
g.db.add(v)
for media_usage in p.media_usages:
media_usage.deleted_utc = p.deleted_utc
g.db.add(media_usage)
for sort in COMMENT_SORTS.keys():
cache.delete(f'post_{p.id}_{sort}')
@ -830,6 +853,10 @@ def undelete_post_pid(pid, v):
v.post_count += 1
g.db.add(v)
for media_usage in p.media_usages:
media_usage.deleted_utc = None
g.db.add(media_usage)
for sort in COMMENT_SORTS.keys():
cache.delete(f'post_{p.id}_{sort}')

View File

@ -0,0 +1,28 @@
create table media_usages (
id integer primary key,
filename character varying(55) NOT NULL,
post_id integer,
comment_id integer,
created_utc integer not null,
deleted_utc integer,
removed_utc integer
);
CREATE SEQUENCE public.media_usages_id_seq
AS integer
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE public.media_usages_id_seq OWNED BY public.media_usages.id;
ALTER TABLE ONLY public.media_usages ALTER COLUMN id SET DEFAULT nextval('public.media_usages_id_seq'::regclass);
alter table media_usages
add constraint media_usages_post_fkey foreign key (post_id) references posts(id);
alter table media_usages
add constraint media_usages_comment_fkey foreign key (comment_id) references comments(id);

View File

@ -13,6 +13,7 @@ gevent-websocket
gevent_inotifyx
greenlet
gunicorn
humanize
imagehash
inotify
isodate