diff --git a/files/classes/comment.py b/files/classes/comment.py
index b5df483b7..799b59cc4 100644
--- a/files/classes/comment.py
+++ b/files/classes/comment.py
@@ -237,6 +237,7 @@ class Comment(Base):
casino_game = relationship("CasinoGame")
wall_user = relationship("User", primaryjoin="User.id==Comment.wall_user_id")
edits = relationship("CommentEdit", order_by="CommentEdit.id.desc()")
+ media_usages = relationship("MediaUsage")
def __init__(self, *args, **kwargs):
if "created_utc" not in kwargs:
diff --git a/files/classes/media.py b/files/classes/media.py
index 40b0e8494..0c0366c23 100644
--- a/files/classes/media.py
+++ b/files/classes/media.py
@@ -1,5 +1,6 @@
import time
from sqlalchemy import Column, ForeignKey
+from sqlalchemy.orm import relationship
from sqlalchemy.sql.sqltypes import *
from flask import request, has_request_context
from files.classes import Base
@@ -23,3 +24,23 @@ class Media(Base):
def __repr__(self):
return f"<{self.__class__.__name__}(kind={self.kind}, filename={self.filename})>"
+
+class MediaUsage(Base):
+ __tablename__ = "media_usages"
+ id = Column(Integer, primary_key=True)
+ filename = Column(String, ForeignKey("media.filename"))
+ post_id = Column(Integer, ForeignKey("posts.id"))
+ comment_id = Column(Integer, ForeignKey("comments.id"))
+ created_utc = Column(Integer)
+ deleted_utc = Column(Integer)
+ removed_utc = Column(Integer)
+
+ media = relationship("Media")
+
+ def __init__(self, *args, **kwargs):
+ if "created_utc" not in kwargs:
+ kwargs["created_utc"] = int(time.time())
+ super().__init__(*args, **kwargs)
+
+ def __repr__(self):
+ return f"<{self.__class__.__name__}(id={self.id})>"
diff --git a/files/classes/post.py b/files/classes/post.py
index ac7d8e3cb..3d127ca91 100644
--- a/files/classes/post.py
+++ b/files/classes/post.py
@@ -87,6 +87,7 @@ class Post(Base):
hole_obj = relationship("Hole", primaryjoin="foreign(Post.hole)==remote(Hole.name)")
options = relationship("PostOption", order_by="PostOption.id")
edits = relationship("PostEdit", order_by="PostEdit.id.desc()")
+ media_usages = relationship("MediaUsage")
def __init__(self, *args, **kwargs):
if "created_utc" not in kwargs:
diff --git a/files/helpers/actions.py b/files/helpers/actions.py
index ae5dbb3a6..8b86e2966 100644
--- a/files/helpers/actions.py
+++ b/files/helpers/actions.py
@@ -430,6 +430,11 @@ def execute_antispam_post_check(title, v, url):
for post in similar_posts + similar_urls:
post.is_banned = True
+
+ for media_usage in post.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
post.profile_pinned = False
post.ban_reason = "AutoJanny for spamming"
g.db.add(post)
@@ -491,6 +496,11 @@ def execute_antispam_comment_check(body, v):
for comment in similar_comments:
comment.is_banned = True
+
+ for media_usage in comment.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
comment.ban_reason = "AutoJanny for spamming"
g.db.add(comment)
ma = ModAction(
diff --git a/files/helpers/cron.py b/files/helpers/cron.py
index a7b67889c..5bcae1cd7 100644
--- a/files/helpers/cron.py
+++ b/files/helpers/cron.py
@@ -12,6 +12,7 @@ from sqlalchemy import or_
import click
import requests
+import humanize
from files.helpers.stats import *
from files.routes.front import frontlist
@@ -421,4 +422,26 @@ def _set_top_poster_of_the_day_id():
cache.set("top_poster_of_the_day_id", user.id, timeout=86400)
def _cleanup_videos():
- subprocess.call("scripts/cleanup_videos.sh", timeout=3000)
\ No newline at end of file
+ subprocess.call("scripts/cleanup_videos.sh", timeout=3000)
+
+ db = db_session()
+
+ clean = [x[0] for x in db.query(MediaUsage.filename).filter_by(deleted_utc=None, removed_utc=None)]
+
+ one_month_ago = time.time() - 2592000
+
+ to_delete = db.query(MediaUsage.filename, Media.size).join(MediaUsage.media).filter(
+ MediaUsage.filename.notin_(clean),
+ or_(
+ MediaUsage.deleted_utc < one_month_ago,
+ MediaUsage.removed_utc < one_month_ago,
+ ),
+ ).order_by(Media.size.desc())
+
+ total_saved = 0
+ for filename, size in to_delete:
+ total_saved += size
+ print(filename, humanize.naturalsize(size, binary=True), flush=True)
+
+ total_saved = humanize.naturalsize(total_saved, binary=True)
+ print(f"Total saved: {total_saved}")
\ No newline at end of file
diff --git a/files/helpers/media.py b/files/helpers/media.py
index 7c5c824a4..9f99d8816 100644
--- a/files/helpers/media.py
+++ b/files/helpers/media.py
@@ -163,7 +163,7 @@ def reencode_video(old, new, check_sizes=False):
-def process_video(file, v):
+def process_video(file, v, post=None):
if isinstance(file, str):
old = file
else:
@@ -224,6 +224,12 @@ def process_video(file, v):
)
g.db.add(media)
+ if post:
+ media_usage = MediaUsage(filename=new)
+ media_usage.post_id = post.id
+ g.db.add(media_usage)
+
+
url = SITE_FULL_VIDEOS + new.split('/videos')[1]
name = f'/images/{time.time()}'.replace('.','') + '.webp'
diff --git a/files/helpers/regex.py b/files/helpers/regex.py
index ef8fe1b87..6a9c20303 100644
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@@ -2,7 +2,7 @@ import random
import re
from flask import g
-from files.classes.media import Media
+from files.classes.media import *
from .config.const import *
NOT_IN_CODE_OR_LINKS = '(?!([^<]*<\/(code|pre|a)>|[^`\n]*`))'
@@ -109,14 +109,30 @@ image_check_regex = re.compile(f'!\[\]\(((?!(https:\/\/({hosts})\/|\/)).*?)\)',
video_regex_extensions = '|'.join(VIDEO_FORMATS)
video_sub_regex = re.compile(f'(?
'
+ media = g.db.get(Media, filename)
+ if media:
+ if obj:
+ if not obj.id: raise Exception("The thing that never happens happened again")
+ if str(obj.__class__) == "":
+ existing = g.db.query(MediaUsage.id).filter_by(filename=filename, post_id=obj.id).one_or_none()
+ if not existing:
+ media_usage = MediaUsage(filename=filename)
+ media_usage.post_id = obj.id
+ g.db.add(media_usage)
+ else:
+ existing = g.db.query(MediaUsage.id).filter_by(filename=filename, comment_id=obj.id).one_or_none()
+ if not existing:
+ media_usage = MediaUsage(filename=filename)
+ media_usage.comment_id = obj.id
+ g.db.add(media_usage)
+
+ if media.posterurl:
+ return f''
return f''
audio_regex_extensions = '|'.join(AUDIO_FORMATS)
diff --git a/files/helpers/sanitize.py b/files/helpers/sanitize.py
index 339ec435c..e81fab1fe 100644
--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@@ -487,7 +487,7 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
sanitized = sanitized.replace('&','&')
- sanitized = video_sub_regex.sub(video_sub_regex_matcher, sanitized)
+ sanitized = video_sub_regex.sub(lambda match: video_sub_regex_matcher(match, obj), sanitized)
sanitized = audio_sub_regex.sub(r'', sanitized)
if count_emojis:
diff --git a/files/routes/admin.py b/files/routes/admin.py
index 2241a8727..c6d1d492e 100644
--- a/files/routes/admin.py
+++ b/files/routes/admin.py
@@ -245,6 +245,11 @@ def revert_actions(v, username):
for item in posts + comments:
item.is_banned = False
+
+ for media_usage in item.media_usages:
+ media_usage.removed_utc = None
+ g.db.add(media_usage)
+
item.ban_reason = None
item.is_approved = v.id
g.db.add(item)
@@ -1377,6 +1382,11 @@ def unprogstack_comment(comment_id, v):
def remove_post(post_id, v):
post = get_post(post_id)
post.is_banned = True
+
+ for media_usage in post.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
post.is_approved = None
if not FEATURES['AWARDS'] or not post.pinned or not post.pinned.endswith(PIN_AWARD_TEXT):
@@ -1423,6 +1433,11 @@ def approve_post(post_id, v):
g.db.add(ma)
post.is_banned = False
+
+ for media_usage in post.media_usages:
+ media_usage.removed_utc = None
+ g.db.add(media_usage)
+
post.ban_reason = None
post.is_approved = v.id
@@ -1595,6 +1610,11 @@ def remove_comment(c_id, v):
comment = get_comment(c_id)
comment.is_banned = True
+
+ for media_usage in comment.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
comment.is_approved = None
comment.ban_reason = v.username
g.db.add(comment)
@@ -1633,6 +1653,11 @@ def approve_comment(c_id, v):
g.db.add(ma)
comment.is_banned = False
+
+ for media_usage in comment.media_usages:
+ media_usage.removed_utc = None
+ g.db.add(media_usage)
+
comment.ban_reason = None
comment.is_approved = v.id
@@ -1721,6 +1746,11 @@ def admin_nuke_user(v):
continue
post.is_banned = True
+
+ for media_usage in post.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
post.ban_reason = v.username
g.db.add(post)
@@ -1729,6 +1759,11 @@ def admin_nuke_user(v):
continue
comment.is_banned = True
+
+ for media_usage in comment.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
comment.ban_reason = v.username
g.db.add(comment)
@@ -1757,6 +1792,11 @@ def admin_nunuke_user(v):
continue
post.is_banned = False
+
+ for media_usage in post.media_usages:
+ media_usage.removed_utc = None
+ g.db.add(media_usage)
+
post.ban_reason = None
post.is_approved = v.id
g.db.add(post)
@@ -1766,6 +1806,11 @@ def admin_nunuke_user(v):
continue
comment.is_banned = False
+
+ for media_usage in comment.media_usages:
+ media_usage.removed_utc = None
+ g.db.add(media_usage)
+
comment.ban_reason = None
comment.is_approved = v.id
g.db.add(comment)
diff --git a/files/routes/comments.py b/files/routes/comments.py
index a1b5cce04..dba1283a5 100644
--- a/files/routes/comments.py
+++ b/files/routes/comments.py
@@ -232,6 +232,7 @@ def comment(v):
c.upvotes = 1
+ g.db.add(c)
body_html = sanitize(body, limit_pings=5, showmore=(not v.hieroglyphs), count_emojis=not v.marsify, commenters_ping_post_id=commenters_ping_post_id, obj=c, author=v)
if post_target.id not in ADMIGGER_THREADS and not (v.chud and v.chud_phrase.lower() in body.lower()):
@@ -256,7 +257,6 @@ def comment(v):
c.body_html = body_html
- g.db.add(c)
g.db.flush()
if not posting_to_post and v.admin_level >= PERMS['ADMIN_NOTES'] and request.values.get('admin_note') == 'true' :
@@ -277,6 +277,11 @@ def comment(v):
if not complies_with_chud(c):
c.is_banned = True
+
+ for media_usage in c.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
c.ban_reason = "AutoJanny for lack of chud phrase"
g.db.add(c)
@@ -425,6 +430,10 @@ def delete_comment(cid, v):
v.comment_count -= 1
g.db.add(v)
+ for media_usage in c.media_usages:
+ media_usage.deleted_utc = c.deleted_utc
+ g.db.add(media_usage)
+
cache.delete_memoized(comment_idlist)
if c.parent_post:
@@ -459,6 +468,10 @@ def undelete_comment(cid, v):
v.comment_count += 1
g.db.add(v)
+ for media_usage in c.media_usages:
+ media_usage.deleted_utc = None
+ g.db.add(media_usage)
+
cache.delete_memoized(comment_idlist)
if c.parent_post:
diff --git a/files/routes/posts.py b/files/routes/posts.py
index beffb2822..e42e10152 100644
--- a/files/routes/posts.py
+++ b/files/routes/posts.py
@@ -641,6 +641,7 @@ def submit_post(v, hole=None):
p.title_html = title_html
+ g.db.add(p)
body_html = sanitize(body, count_emojis=True, limit_pings=100, obj=p, author=v)
if v.hieroglyphs and not p.distinguished and marseyaward_body_regex.search(body_html):
@@ -684,7 +685,7 @@ def submit_post(v, hole=None):
copyfile(name, name2)
p.thumburl = process_image(name2, v, resize=199)
elif file.content_type.startswith('video/'):
- p.url, p.posterurl, name = process_video(file, v)
+ p.url, p.posterurl, name = process_video(file, v, post=p)
if p.posterurl:
name2 = name.replace('.webp', 'r.webp')
copyfile(name, name2)
@@ -693,9 +694,27 @@ def submit_post(v, hole=None):
p.url = process_audio(file, v)
else:
stop(415)
+ elif p.url and p.url.startswith(SITE_FULL_VIDEOS):
+ filename = p.url.split(SITE_FULL_VIDEOS)[0]
+ print(filename, flush=True)
+ media = g.db.get(Media, filename)
+ print(media, flush=True)
+ if media:
+ media_usage = MediaUsage(
+ filename=filename,
+ post_id=p.id,
+ )
+ g.db.add(media_usage)
+ if media.posterurl:
+ p.posterurl = media.posterurl
if not p.draft and not complies_with_chud(p):
p.is_banned = True
+
+ for media_usage in p.media_usages:
+ media_usage.removed_utc = time.time()
+ g.db.add(media_usage)
+
p.ban_reason = "AutoJanny for lack of chud phrase"
body = random.choice(CHUD_MSGS).format(username=v.username, type='post', CHUD_PHRASE=v.chud_phrase)
@@ -802,6 +821,10 @@ def delete_post_pid(pid, v):
v.post_count -= 1
g.db.add(v)
+ for media_usage in p.media_usages:
+ media_usage.deleted_utc = p.deleted_utc
+ g.db.add(media_usage)
+
for sort in COMMENT_SORTS.keys():
cache.delete(f'post_{p.id}_{sort}')
@@ -830,6 +853,10 @@ def undelete_post_pid(pid, v):
v.post_count += 1
g.db.add(v)
+ for media_usage in p.media_usages:
+ media_usage.deleted_utc = None
+ g.db.add(media_usage)
+
for sort in COMMENT_SORTS.keys():
cache.delete(f'post_{p.id}_{sort}')
diff --git a/migrations/20241022-cleanup-videos.sql b/migrations/20241022-cleanup-videos.sql
new file mode 100644
index 000000000..1221cfd97
--- /dev/null
+++ b/migrations/20241022-cleanup-videos.sql
@@ -0,0 +1,28 @@
+create table media_usages (
+ id integer primary key,
+ filename character varying(55) NOT NULL,
+ post_id integer,
+ comment_id integer,
+ created_utc integer not null,
+ deleted_utc integer,
+ removed_utc integer
+);
+
+CREATE SEQUENCE public.media_usages_id_seq
+ AS integer
+ START WITH 1
+ INCREMENT BY 1
+ NO MINVALUE
+ NO MAXVALUE
+ CACHE 1;
+
+ALTER SEQUENCE public.media_usages_id_seq OWNED BY public.media_usages.id;
+
+ALTER TABLE ONLY public.media_usages ALTER COLUMN id SET DEFAULT nextval('public.media_usages_id_seq'::regclass);
+
+
+alter table media_usages
+ add constraint media_usages_post_fkey foreign key (post_id) references posts(id);
+
+alter table media_usages
+ add constraint media_usages_comment_fkey foreign key (comment_id) references comments(id);
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index a18e01e29..4407595a0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,7 @@ gevent-websocket
gevent_inotifyx
greenlet
gunicorn
+humanize
imagehash
inotify
isodate