WIP: Comment Query Optimization (Eager Loading) #1

Closed
Snakes wants to merge 2 commits from <deleted>:comment-query-optimization into master
3 changed files with 111 additions and 1 deletions

View File

@ -1,3 +1,4 @@
from collections import defaultdict
from typing import Callable, Iterable, List, Optional, Union
from flask import *
@ -6,6 +7,7 @@ from sqlalchemy.orm import joinedload, selectinload, Query
from files.classes import Comment, CommentVote, Hat, Sub, Submission, User, UserBlock, Vote
from files.helpers.const import AUTOJANNY_ID
from files.helpers.sorting_and_time import sort_comment_results
def sanitize_username(username:str) -> str:
if not username: return username
@ -345,6 +347,79 @@ def get_comments_v_properties(v:User, include_shadowbanned=True, should_keep_fun
else: dump.append(comment)
return (comments, output)
def get_comment_trees_eager(
top_comment_ids:Iterable[int],
sort:str="old",
v:Optional[User]=None) -> List[Comment]:
if v:
votes = g.db.query(CommentVote).filter_by(user_id=v.id).subquery()
blocking = v.blocking.subquery()
blocked = v.blocked.subquery()
query = g.db.query(
Comment,
votes.c.vote_type,
blocking.c.target_id,
blocked.c.target_id,
).join(
votes, votes.c.comment_id==Comment.id, isouter=True
).join(
blocking,
blocking.c.target_id == Comment.author_id,
isouter=True
).join(
blocked,
blocked.c.user_id == Comment.author_id,
isouter=True
)
else:
query = g.db.query(Comment)
if v and v.can_see_shadowbanned:
query = query.join(Comment.author).filter(User.shadowbanned == None)
query = query.filter(Comment.top_comment_id.in_(top_comment_ids))
query = query.options(
selectinload(Comment.author).options(
selectinload(User.hats_equipped.and_(Hat.equipped == True)) \
.joinedload(Hat.hat_def, innerjoin=True),
selectinload(User.sub_mods),
selectinload(User.sub_exiles),
),
selectinload(Comment.flags),
selectinload(Comment.awards),
selectinload(Comment.options),
)
results = query.all()
if v:
comments = [c[0] for c in results]
for i in range(len(comments)):
comments[i].voted = results[i][1] or 0
comments[i].is_blocking = results[i][2] or 0
comments[i].is_blocked = results[i][3] or 0
else:
comments = results
comments_map = {}
comments_map_parent = defaultdict(lambda: [])
for c in comments:
c.replies2 = []
comments_map[c.id] = c
comments_map_parent[c.parent_comment_id].append(c)
for parent_id in comments_map_parent:
if parent_id is None: continue
comments_map_parent[parent_id] = sort_comment_results(
sort, comments_map_parent[parent_id])
comments_map[parent_id].replies2 = comments_map_parent[parent_id]
return [comments_map[tcid] for tcid in top_comment_ids]
def get_sub_by_name(sub:str, v:Optional[User]=None, graceful=False) -> Optional[Sub]:
if not sub:
if graceful: return None

View File

@ -48,6 +48,39 @@ def sort_objects(sort, objects, cls, include_shadowbanned=False):
else:
return objects.order_by(cls.downvotes - cls.upvotes, cls.created_utc.desc())
# Presently designed around files.helpers.get.get_comment_trees_eager
# Behavior should parallel that of sort_objects above. TODO: Unify someday?
def sort_comment_results(sort, comments):
DESC = (2 << 30) - 1 # descending sorts, Y2038 problem, change before then
if sort == "hot":
ti = int(time.time()) + 3600
if SITE_NAME == 'rDrama': metric = lambda c: c.realupvotes
else: metric = lambda c: c.upvotes - c.downvotes
key_func = lambda c: (
-1000000*(metric(c) + 1)/(pow(((ti - c.created_utc)/1000), 1.23)),
DESC - c.created_utc,
)
elif sort == "new":
key_func = lambda c: DESC - c.created_utc
elif sort == "old":
key_func = lambda c: c.created_utc
elif sort == "controversial":
key_func = lambda c: (
(c.upvotes+1)/(c.downvotes+1) + (c.downvotes+1)/(c.upvotes+1),
DESC - c.downvotes,
DESC - c.created_utc,
)
elif sort == "bottom":
key_func = lambda c: (c.upvotes - c.downvotes, DESC - c.created_utc)
else:
key_func = lambda c: (c.downvotes - c.upvotes, DESC - c.created_utc)
key_func_stickied = lambda c: (
(c.stickied is None, c.stickied == '', c.stickied), # sort None last
key_func(c))
return sorted(comments, key=key_func_stickied)
def make_age_string(compare:Optional[int]) -> str:
if not compare or compare < 1577865600: return ""
age = int(time.time()) - compare

View File

@ -212,7 +212,9 @@ def post_id(pid, anything=None, v=None, sub=None):
else:
pinned2[pin] = ''
post.replies = list(pinned2.keys()) + comments
top_comments = list(pinned2.keys()) + comments
top_comment_ids = [c.id for c in top_comments]
post.replies = get_comment_trees_eager(top_comment_ids, sort, v)
post.views += 1
g.db.add(post)